-
Notifications
You must be signed in to change notification settings - Fork 33
Expand file tree
/
Copy pathdandiapi.py
More file actions
2209 lines (1931 loc) · 80.6 KB
/
dandiapi.py
File metadata and controls
2209 lines (1931 loc) · 80.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
"""REST API client for interacting with DANDI Archive instances.
This module provides client classes for communicating with DANDI Archive API
servers, including asset management, dandiset operations, and authentication.
The main classes are:
- DandiAPIClient: High-level client for DANDI API operations
- RESTFullAPIClient: Base HTTP client with retry and authentication
- RemoteDandiset: Represents a dandiset on the server
- RemoteAsset: Represents an asset (file) on the server
"""
from __future__ import annotations
from abc import ABC, abstractmethod
from collections.abc import Callable, Iterable, Iterator, Sequence
from concurrent.futures import ThreadPoolExecutor
from dataclasses import dataclass
from datetime import datetime
from enum import Enum
from fnmatch import fnmatchcase
import json
import os.path
from pathlib import Path, PurePosixPath
import posixpath
import re
from time import sleep, time
from types import TracebackType
from typing import TYPE_CHECKING, Any, Dict, List, Optional
import click
from dandischema import models
import dandischema.consts
from packaging.version import Version as PackagingVersion
from pydantic import BaseModel, Field, PrivateAttr
import requests
import tenacity
from yarl import URL
from . import get_logger
from .consts import (
DOWNLOAD_TIMEOUT,
DRAFT,
MAX_CHUNK_SIZE,
REQUEST_RETRIES,
RETRY_STATUSES,
ZARR_DELETE_BATCH_SIZE,
DandiInstance,
EmbargoStatus,
)
from .exceptions import HTTP404Error, NotFoundError, SchemaVersionError
from .keyring_utils import keyring_lookup, keyring_save
from .misctypes import Digest, RemoteReadableAsset
from .utils import (
USER_AGENT,
check_dandi_version,
chunked,
ensure_datetime,
get_instance,
get_retry_after,
is_interactive,
is_page2_url,
joinurl,
)
if TYPE_CHECKING:
from typing_extensions import Self
from .apicache import APIMetadataCache
lgr = get_logger()
class AssetType(Enum):
"""
.. versionadded:: 0.36.0
An enum for the different kinds of resources that an asset's actual data
can be
"""
BLOB = 1
ZARR = 2
class VersionStatus(Enum):
PENDING = "Pending"
VALIDATING = "Validating"
VALID = "Valid"
INVALID = "Invalid"
PUBLISHING = "Publishing"
PUBLISHED = "Published"
# Following class is loosely based on GirderClient, with authentication etc
# being stripped.
# TODO: add copyright/license info
class RESTFullAPIClient:
"""
Base class for a JSON-based HTTP(S) client for interacting with a given
base API URL.
All request methods can take either an absolute URL or a slash-separated
path; in the latter case, the path is appended to the base API URL
(separated by a slash) in order to determine the actual URL to make the
request of.
`RESTFullAPIClient` instances are usable as context managers, in which case
they will close their associated session on exit.
"""
def __init__(
self,
api_url: str,
session: requests.Session | None = None,
headers: dict | None = None,
) -> None:
"""
:param str api_url: The base HTTP(S) URL to prepend to request paths
:param session: an optional `requests.Session` instance to use; if not
specified, a new session is created
:param headers: an optional `dict` of headers to send in every request
"""
self.api_url = api_url
if session is None:
session = requests.Session()
session.headers["User-Agent"] = USER_AGENT
if headers is not None:
session.headers.update(headers)
self.session = session
#: Default number of items to request per page when paginating (`None`
#: means to use the server's default)
self.page_size: int | None = None
#: How many pages to fetch at once when parallelizing pagination
self.page_workers: int = 5
def __enter__(self) -> Self:
return self
def __exit__(
self,
exc_type: type[BaseException] | None,
exc_val: BaseException | None,
exc_tb: TracebackType | None,
) -> None:
self.session.close()
def request(
self,
method: str,
path: str,
params: dict | None = None,
data: Any = None,
files: dict | None = None,
json: Any = None,
headers: dict | None = None,
json_resp: bool = True,
retry_statuses: Sequence[int] = (),
retry_if: Callable[[requests.Response], Any] | None = None,
**kwargs: Any,
) -> Any:
"""
This method looks up the appropriate method, constructs a request URL
from the base URL, path, and parameters, and then sends the request. If
the method is unknown or if the path is not found, an exception is
raised; otherwise, a JSON object is returned with the response.
This is a convenience method to use when making basic requests that do
not involve multipart file data that might need to be specially encoded
or handled differently.
:param method: The HTTP method to use in the request (GET, POST, etc.)
:type method: str
:param path: A string containing the path elements for this request
:type path: str
:param params: A dictionary mapping strings to strings, to be used
as the key/value pairs in the request parameters.
:type params: dict
:param data: A dictionary, bytes or file-like object to send in the
body.
:param files: A dictionary of 'name' => file-like-objects for multipart
encoding upload.
:type files: dict
:param json: A JSON object to send in the request body.
:type json: dict
:param headers: If present, a dictionary of headers to encode in the
request.
:type headers: dict
:param json_resp: Whether the response should be parsed as JSON. If
False, the raw response object is returned. To get the raw binary
content of the response, use the ``content`` attribute of the
return value, e.g.
.. code-block:: python
resp = client.get('my/endpoint', json_resp=False)
print(resp.content) # Raw binary content
print(resp.headers) # Dict of headers
:type json_resp: bool
:param retry_statuses: a sequence of HTTP response status codes to
retry in addition to `dandi.consts.RETRY_STATUSES`
:param retry_if: an optional predicate applied to a failed HTTP
response to test whether to retry
"""
url = self.get_url(path)
if headers is None:
headers = {}
if json_resp and "accept" not in headers:
headers["accept"] = "application/json"
lgr.debug("%s %s", method.upper(), url)
try:
for i, attempt in enumerate(
tenacity.Retrying(
wait=tenacity.wait_exponential(exp_base=1.25, multiplier=1.25),
# urllib3's ConnectionPool isn't thread-safe, so we
# sometimes hit ConnectionErrors on the start of an upload.
# Retry when this happens.
# Cf. <https://github.com/urllib3/urllib3/issues/951>.
retry=tenacity.retry_if_exception_type(
(requests.ConnectionError, requests.HTTPError)
),
stop=tenacity.stop_after_attempt(REQUEST_RETRIES),
reraise=True,
)
):
with attempt:
result = self.session.request(
method,
url,
params=params,
data=data,
files=files,
json=json,
headers=headers,
**kwargs,
)
if result.status_code in [*RETRY_STATUSES, *retry_statuses] or (
retry_if is not None and retry_if(result)
):
if attempt.retry_state.attempt_number < REQUEST_RETRIES:
lgr.warning(
"Will retry: Error %d while sending %s request to %s: %s",
result.status_code,
method,
url,
result.text,
)
if data is not None and hasattr(data, "seek"):
data.seek(0)
if retry_after := get_retry_after(result):
lgr.debug(
"Sleeping for %d seconds as instructed in response "
"(in addition to tenacity imposed)",
retry_after,
)
sleep(retry_after)
result.raise_for_status()
except Exception as e:
if isinstance(e, requests.HTTPError):
lgr.error(
"HTTP request failed repeatedly: Error %d while sending %s request to %s: %s",
e.response.status_code if e.response is not None else "?",
method,
url,
e.response.text if e.response is not None else "?",
)
else:
lgr.exception("HTTP connection failed")
raise
if i > 0:
lgr.info(
"%s %s succeeded after %d retr%s",
method.upper(),
url,
i,
"y" if i == 1 else "ies",
)
lgr.debug("Response: %d", result.status_code)
# If success, return the json object. Otherwise throw an exception.
if not result.ok:
msg = f"Error {result.status_code} while sending {method} request to {url}"
if result.status_code == 409:
# Blob exists on server; log at DEBUG level
lgr.debug("%s: %s", msg, result.text)
else:
lgr.error("%s: %s", msg, result.text)
if len(result.text) <= 1024:
msg += f": {result.text}"
else:
msg += f": {result.text[:1024]}... [{len(result.text)}-char response truncated]"
if result.status_code == 404:
raise HTTP404Error(msg, response=result)
else:
raise requests.HTTPError(msg, response=result)
if json_resp:
if result.text.strip():
return result.json()
else:
return None
else:
return result
def get_url(self, path: str) -> str:
"""
Append a slash-separated ``path`` to the instance's base URL. The two
components are separated by a single slash, removing any excess slashes
that would be present after naïve concatenation.
If ``path`` is already an absolute URL, it is returned unchanged.
"""
return joinurl(self.api_url, path)
def get(self, path: str, **kwargs: Any) -> Any:
"""
Convenience method to call `request()` with the 'GET' HTTP method.
"""
return self.request("GET", path, **kwargs)
def post(self, path: str, **kwargs: Any) -> Any:
"""
Convenience method to call `request()` with the 'POST' HTTP method.
"""
return self.request("POST", path, **kwargs)
def put(self, path: str, **kwargs: Any) -> Any:
"""
Convenience method to call `request()` with the 'PUT' HTTP method.
"""
return self.request("PUT", path, **kwargs)
def delete(self, path: str, **kwargs: Any) -> Any:
"""
Convenience method to call `request()` with the 'DELETE' HTTP method.
"""
return self.request("DELETE", path, **kwargs)
def patch(self, path: str, **kwargs: Any) -> Any:
"""
Convenience method to call `request()` with the 'PATCH' HTTP method.
"""
return self.request("PATCH", path, **kwargs)
def paginate(
self,
path: str,
page_size: int | None = None,
params: dict | None = None,
) -> Iterator:
"""
Paginate through the resources at the given path: GET the path, yield
the values in the ``"results"`` key, and repeat with the URL in the
``"next"`` key until it is ``null``.
If the first ``"next"`` key is the same as the initially-requested URL
but with the ``page`` query parameter set to ``2``, then the remaining
pages are fetched concurrently in separate threads, `page_workers`
(default 5) at a time. This behavior requires the initial response to
contain a ``"count"`` key giving the number of items across all pages.
:param page_size:
If non-`None`, overrides the client's `page_size` attribute for
this sequence of pages
"""
if page_size is None:
page_size = self.page_size
if page_size is not None:
if params is None:
params = {}
params["page_size"] = page_size
resp = self.get(path, params=params, json_resp=False)
r = resp.json()
if r["next"] is not None:
page1 = resp.history[0].url if resp.history else resp.url
if not is_page2_url(page1, r["next"]):
if os.environ.get("DANDI_PAGINATION_DISABLE_FALLBACK"):
raise RuntimeError(
f"API server changed pagination strategy: {page1} URL"
f" is now followed by {r['next']}"
)
else:
while True:
yield from r["results"]
if r.get("next"):
r = self.get(r["next"])
else:
return
yield from r["results"]
if r["next"] is None:
return
if page_size is None:
page_size = len(r["results"])
pages = (r["count"] + page_size - 1) // page_size
def get_page(pageno: int) -> list:
params2 = params.copy() if params is not None else {}
params2["page"] = pageno
results = self.get(path, params=params2)["results"]
assert isinstance(results, list)
return results
with ThreadPoolExecutor(max_workers=self.page_workers) as pool:
futures = [pool.submit(get_page, i) for i in range(2, pages + 1)]
try:
for f in futures:
yield from f.result()
finally:
for f in futures:
f.cancel()
class DandiAPIClient(RESTFullAPIClient):
"""A client for interacting with a DANDI API server"""
def __init__(
self,
api_url: str | None = None,
token: str | None = None,
dandi_instance: DandiInstance | None = None,
cache: bool = False,
) -> None:
"""
Construct a client instance for the given API URL or DANDI instance
(mutually exclusive options). If no URL or instance is supplied, the
instance specified by the :envvar:`DANDI_INSTANCE` environment variable
(default value: ``"dandi"``) is used.
:param str api_url: Base API URL of the server to interact with.
- For DANDI production, use ``"https://api.dandiarchive.org/api"``
- For DANDI sandbox, use
``"https://api.sandbox.dandiarchive.org/api"``
:param str token: User API Key. Note that different instance APIs have
different keys.
:param bool cache: When ``True``, API metadata responses are cached
persistently to disk (in an sqlite3 database) and validated against
``modified`` timestamps. Controlled by the :envvar:`DANDI_CACHE`
environment variable (``"ignore"`` disables, ``"clear"`` wipes the
cache on first access).
"""
check_dandi_version()
if api_url is None:
if dandi_instance is None:
instance_name = os.environ.get("DANDI_INSTANCE", "dandi")
dandi_instance = get_instance(instance_name)
api_url = dandi_instance.api
elif dandi_instance is not None:
raise ValueError(
"api_url and dandi_instance are mutually exclusive. "
"Use either 'api_url' to specify a custom API URL, "
"or 'dandi_instance' to use a registered DANDI instance, but not both."
)
else:
dandi_instance = get_instance(api_url)
super().__init__(api_url)
self.dandi_instance: DandiInstance = dandi_instance
if token is not None:
self.authenticate(token)
if cache:
from .apicache import APIMetadataCache
self._cache: APIMetadataCache | None = APIMetadataCache()
else:
self._cache = None
@property
def cache(self) -> APIMetadataCache | None:
"""The persistent API metadata cache, or ``None`` if caching is disabled."""
return self._cache
@classmethod
def for_dandi_instance(
cls,
instance: str | DandiInstance,
token: str | None = None,
authenticate: bool = False,
) -> DandiAPIClient:
"""
Construct a client instance for the server identified by ``instance``
(either the name of a registered DANDI instance or a
`DandiInstance` instance) and an optional authentication token/API key.
If no token is supplied and ``authenticate`` is true,
`dandi_authenticate()` is called on the instance before returning it.
"""
client = cls(dandi_instance=get_instance(instance), token=token)
if token is None and authenticate:
client.dandi_authenticate()
return client
def authenticate(self, token: str, save_to_keyring: bool = False) -> None:
"""
Set the authentication token/API key used by the `DandiAPIClient`.
Before setting the token, a test request to ``/auth/token`` is made to
check the token's validity; if it fails, a `requests.HTTPError` is
raised.
If ``save_to_keyring`` is true, then (after querying ``/auth/token``
but before setting the API key used by the client), the token is saved
in the user's keyring at the same location as used by
`dandi_authenticate()`.
.. versionchanged:: 0.53.0
``save_to_keyring`` added
"""
# Fails if token is invalid:
self.get("/auth/token", headers={"Authorization": f"token {token}"})
if save_to_keyring:
keyring_save(self._get_keyring_ids()[1], "key", token)
lgr.debug("Stored key in keyring")
self.session.headers["Authorization"] = f"token {token}"
def dandi_authenticate(self) -> None:
"""
Acquire and set the authentication token/API key used by the
`DandiAPIClient`.
If the :envvar:`{INSTANCE_NAME}_API_KEY` environment variable is set, its value
is used as the token. Here, ``{INSTANCE_NAME}`` is the uppercased instance name
with hyphens replaced by underscores. Otherwise, the token is looked up in the
user's keyring under the service ":samp:`dandi-api-{self.dandi_instance.name}`"
[#auth]_ and username "``key``". If no token is found there, the user is
prompted for the token, and, if it proves to be valid, it is stored in the
user's keyring.
.. [#auth] E.g., "``dandi-api-dandi``" for the production server or
"``dandi-api-dandi-sandbox``" for the sandbox server
"""
# Shortcut for advanced folks
env_var_name = self.api_key_env_var
api_key = os.environ.get(env_var_name, None)
if api_key:
lgr.debug(f"Using `{env_var_name}` environment variable as the API key")
self.authenticate(api_key)
return
client_name, app_id = self._get_keyring_ids()
keyring_backend, api_key = keyring_lookup(app_id, "key")
key_from_keyring = api_key is not None
while True:
if not api_key:
api_key = input(f"Please provide API Key for {client_name}: ")
key_from_keyring = False
try:
lgr.debug(
"Using API key from %s",
{True: "keyring", False: "user input"}[key_from_keyring],
)
self.authenticate(api_key)
except requests.HTTPError:
if is_interactive() and click.confirm(
"API key is invalid; enter another?"
):
api_key = None
continue
else:
raise
else:
if not key_from_keyring:
keyring_backend.set_password(app_id, "key", api_key)
lgr.debug("Stored key in keyring")
break
def _get_keyring_ids(self) -> tuple[str, str]:
client_name = self.dandi_instance.name
return (client_name, f"dandi-api-{client_name}")
@property
def _instance_id(self) -> str:
return self.dandi_instance.name.upper()
def get_dandiset(
self, dandiset_id: str, version_id: str | None = None, lazy: bool = True
) -> RemoteDandiset:
"""
Fetches the Dandiset with the given ``dandiset_id``. If ``version_id``
is not specified, the `RemoteDandiset`'s version is set to the most
recent published version if there is one, otherwise to the draft
version.
If ``lazy`` is true, no requests are actually made until any data is
requested from the `RemoteDandiset`.
"""
if lazy:
return RemoteDandiset(self, dandiset_id, version_id)
else:
try:
d = RemoteDandiset.from_data(
self, self.get(f"/dandisets/{dandiset_id}/")
)
except HTTP404Error:
raise NotFoundError(
f"No such Dandiset: {dandiset_id!r}. "
"Verify the Dandiset ID is correct and that you have access. "
)
if version_id is not None and version_id != d.version_id:
if version_id == DRAFT:
return d.for_version(d.draft_version)
else:
return d.for_version(version_id)
return d
def get_dandisets(
self,
*,
draft: bool | None = None,
embargoed: bool | None = None,
empty: bool | None = None,
mine: bool | None = None,
order: str | None = None,
search: str | None = None,
) -> Iterator[RemoteDandiset]:
"""
Returns a generator of all Dandisets on the server. For each Dandiset,
the `RemoteDandiset`'s version is set to the most recent published
version if there is one, otherwise to the draft version.
.. versionchanged:: 0.61.0
``draft``, ``embargoed``, ``empty``, ``mine``, ``order``, and
``search`` parameters added
:param draft:
If true, Dandisets that have only draft versions (i.e., that
haven't yet been published) will be included in the results
(default true)
:param embargoed:
If true, embargoed Dandisets will be included in the results
(default false)
:param empty:
If true, empty Dandisets will be included in the results (default
true)
:param mine:
If true, only Dandisets owned by the authenticated user will be
retrieved (default false)
:param order:
The field to sort the results by. The accepted field names are
``"id"``, ``"name"``, ``"modified"``, and ``"size"``. Prepend a
hyphen to the field name to reverse the sort order.
:param search:
A search string to filter the returned Dandisets by. The string is
searched for in the metadata of Dandiset versions.
"""
for data in self.paginate(
"/dandisets/",
params={
"draft": draft,
"embargoed": embargoed,
"empty": empty,
"ordering": order,
"search": search,
"user": "me" if mine else None,
},
):
yield RemoteDandiset.from_data(self, data)
def create_dandiset(
self, name: str, metadata: dict[str, Any], *, embargo: bool = False
) -> RemoteDandiset:
"""
Creates a Dandiset with the given name & metadata. If ``embargo`` is
`True`, the resulting Dandiset will be embargoed.
.. versionchanged:: 0.61.0
``embargo`` argument added
"""
return RemoteDandiset.from_data(
self,
self.post(
"/dandisets/",
json={"name": name, "metadata": metadata},
params={"embargo": "true" if embargo else "false"},
),
)
def check_schema_version(self, schema_version: str | None = None) -> None:
"""
Confirms that the given schema version at the client is "compatible" with the server.
Compatibility here means that the server's schema version can be either
- lower than client has, but within the same MAJOR.MINOR component of the version
number for 0.x series, and same MAJOR version for/after 1.x series;
- the same;
- higher than the client has, but only if the client's schema version is listed
among the server's `allowed_schema_versions` (as returned by the `/info` API endpoint),
or if not there -- `dandischema.consts.ALLOWED_INPUT_SCHEMAS` is consulted.
If neither of above, a `SchemaVersionError` is raised.
:param schema_version: the schema version to be confirmed for compatibility with the server;
if not set, the schema version for the installed ``dandischema`` library is used.
"""
if schema_version is None:
schema_version = models.get_schema_version()
server_info = self.get("/info/")
server_schema_version = server_info.get("schema_version")
if not server_schema_version:
raise RuntimeError(
"Server did not provide schema_version in /info/;"
f" returned {server_info!r}"
)
server_ver, our_ver = PackagingVersion(server_schema_version), PackagingVersion(
schema_version
)
if server_ver > our_ver:
# TODO: potentially adjust here if name would be different: see
# https://github.com/dandi/dandi-archive/issues/2624
allowed_schema_versions = server_info.get(
"allowed_schema_versions", dandischema.consts.ALLOWED_INPUT_SCHEMAS
)
if schema_version not in allowed_schema_versions:
raise SchemaVersionError(
f"Server uses schema version {server_schema_version};"
f" client only supports prior {schema_version} and it"
f" is not among any of the allowed upgradable schema versions"
f" ({', '.join(allowed_schema_versions)}) . You may need to"
" upgrade dandi and/or dandischema."
)
# TODO: check current server behavior which is likely to just not care!
# So that is where server might need to provide support for upgrades upon
# providing metadata.
elif (
server_ver.major == 0 and server_ver.release[:2] != our_ver.release[:2]
) or (
server_ver.major != our_ver.major
): # MAJOR, MINOR within 0.x.y and MAJOR within 1.x.y
raise SchemaVersionError(
f"Server uses older incompatible schema version {server_schema_version};"
f" client supports {schema_version}."
)
elif server_ver < our_ver:
# Compatible older server version -- all good, but inform the user
# TODO: potentially downgrade the record to match the schema,
# see https://github.com/dandi/dandi-schema/issues/343
lgr.warning(
"Server uses schema version %s older than client's %s (dandischema library %s). "
"Server might fail to validate such assets and you might not be able to "
"publish this dandiset until server is upgraded. "
"Alternatively, you may downgrade dandischema and reupload.",
server_ver,
our_ver,
dandischema.__version__,
)
def get_asset(self, asset_id: str) -> BaseRemoteAsset:
"""
Fetch the asset with the given asset ID. If the given asset does not
exist, a `NotFoundError` is raised.
The returned object will not have any information about the Dandiset
associated with the asset; for that, the `RemoteDandiset.get_asset()`
method must be used instead.
"""
try:
info = self.get(f"/assets/{asset_id}/info/")
except HTTP404Error:
raise NotFoundError(
f"No such asset: {asset_id!r}. "
"Verify the asset ID is correct. "
"Use 'dandi ls' to list available assets."
)
metadata = info.pop("metadata", None)
return BaseRemoteAsset.from_base_data(self, info, metadata)
@property
def api_key_env_var(self) -> str:
"""
Get the name of the environment variable that can be used to specify the
API key for the associated DANDI instance.
"""
return f"{self.dandi_instance.name.upper().replace('-', '_')}_API_KEY"
# `arbitrary_types_allowed` is needed for `client: DandiAPIClient`
class APIBase(BaseModel, populate_by_name=True, arbitrary_types_allowed=True):
"""
Base class for API objects implemented in pydantic.
This class (aside from the `json_dict()` method) is an implementation
detail; do not rely on it.
"""
def json_dict(self) -> dict[str, Any]:
"""
Convert to a JSONable `dict`, omitting the ``client`` attribute and
using the same field names as in the API
"""
return self.model_dump(mode="json", by_alias=True)
class Version(APIBase):
"""
The version information for a Dandiset retrieved from the API.
Stringifying a `Version` returns its identifier.
This class should not be instantiated by end-users directly. Instead,
instances should be retrieved from the appropriate attributes & methods of
`RemoteDandiset`.
"""
#: The version identifier
identifier: str = Field(alias="version")
#: The name of the version
name: str
#: The number of assets in the version
asset_count: int
#: The total size in bytes of all assets in the version
size: int
#: The timestamp at which the version was created
created: datetime
#: The timestamp at which the version was last modified
modified: datetime
status: VersionStatus
def __str__(self) -> str:
return self.identifier
class RemoteValidationError(APIBase):
"""
.. versionadded:: 0.49.0
Validation error record obtained from a server. Not to be confused with
:class:`dandi.validate_types.ValidationResult`, which provides richer
representation of validation errors.
"""
field: str
message: str
class RemoteAssetValidationError(RemoteValidationError):
path: Optional[str] = None
class VersionInfo(Version):
"""
.. versionadded:: 0.49.0
Version information for a Dandiset, including information about validation
errors
"""
asset_validation_errors: List[RemoteAssetValidationError]
version_validation_errors: List[RemoteValidationError]
class RemoteDandisetData(APIBase):
"""
Class for storing the data for a Dandiset retrieved from the API.
This class is an implementation detail and should not be used by third
parties.
"""
identifier: str
created: datetime
modified: datetime
contact_person: str
embargo_status: EmbargoStatus
most_recent_published_version: Optional[Version] = None
draft_version: Version
class RemoteDandiset:
"""
Representation of a Dandiset (as of a certain version) retrieved from the
API.
Stringifying a `RemoteDandiset` returns a string of the form
:samp:`"{server_id}:{dandiset_id}/{version_id}"`.
This class should not be instantiated by end-users directly. Instead,
instances should be retrieved from the appropriate attributes & methods of
`DandiAPIClient` and `RemoteDandiset`.
"""
def __init__(
self,
client: DandiAPIClient,
identifier: str,
version: str | Version | None = None,
data: dict[str, Any] | RemoteDandisetData | None = None,
) -> None:
#: The `DandiAPIClient` instance that returned this `RemoteDandiset`
#: and which the latter will use for API requests
self.client: DandiAPIClient = client
#: The Dandiset identifier
self.identifier: str = identifier
self._version_id: str | None
self._version: Version | None
if version is None:
self._version_id = None
self._version = None
elif isinstance(version, str):
self._version_id = version
self._version = None
else:
self._version_id = version.identifier
self._version = version
self._data: RemoteDandisetData | None
if data is not None:
self._data = RemoteDandisetData.model_validate(data)
else:
self._data = None
def __str__(self) -> str:
return f"{self.client._instance_id}:{self.identifier}/{self.version_id}"
def _get_data(self) -> RemoteDandisetData:
if self._data is None:
try:
self._data = RemoteDandisetData.model_validate(
self.client.get(f"/dandisets/{self.identifier}/")
)
except HTTP404Error:
raise NotFoundError(f"No such Dandiset: {self.identifier}")
return self._data
@property
def version_id(self) -> str:
"""The identifier for the Dandiset version"""
if self._version_id is None:
self._version_id = self.version.identifier
return self._version_id
@property
def version(self) -> Version:
"""The version in question of the Dandiset"""
if self._version is None:
if self._version_id is None:
self._get_data()
if self._data is not None:
for v in [
self._data.most_recent_published_version,
self._data.draft_version,
]:
if v is not None and (
self._version_id is None or v.identifier == self.version_id
):
self._version = v
self._version_id = v.identifier
return v
assert self._version_id is not None
self._version = self.get_version(self._version_id)
return self._version
@property
def created(self) -> datetime:
"""The timestamp at which the Dandiset was created"""
return self._get_data().created
@property
def modified(self) -> datetime:
"""The timestamp at which the Dandiset was last modified"""
return self._get_data().modified
@property
def contact_person(self) -> str:
"""The name of the registered contact person for the Dandiset"""
return self._get_data().contact_person
@property
def embargo_status(self) -> EmbargoStatus:
"""The current embargo status for the Dandiset"""
return self._get_data().embargo_status
@property
def most_recent_published_version(self) -> Version | None:
"""
The most recent published (non-draft) version of the Dandiset, or
`None` if no versions have been published
"""
return self._get_data().most_recent_published_version
@property
def draft_version(self) -> Version:
"""The draft version of the Dandiset"""
return self._get_data().draft_version
@property
def api_path(self) -> str:
"""