Skip to content

Commit bf092a2

Browse files
vinay553claude
andcommitted
Add dataset tags to SDK for identification (DE-7033)
Expose dataset tags through the Python SDK so customers can identify datasets labeled by Scale vs other vendors via the API. - Add `tags` field to DatasetInfo model (returned by dataset.info()) - Add get_tags(), add_tags(), remove_tags() methods to Dataset class - Use POST /tags/remove instead of DELETE to avoid proxy body-stripping - Use pydantic v1/v2 compat shim for null-coercion validator - Guard against passing a bare string instead of a list Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 400dfd8 commit bf092a2

File tree

2 files changed

+58
-1
lines changed

2 files changed

+58
-1
lines changed

nucleus/data_transfer_object/dataset_info.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,12 @@
1-
from typing import Any, Dict, List, Optional
1+
from typing import TYPE_CHECKING, Any, Dict, List, Optional
2+
3+
if TYPE_CHECKING:
4+
from pydantic.v1 import validator
5+
else:
6+
try:
7+
from pydantic.v1 import validator
8+
except ImportError:
9+
from pydantic import validator
210

311
from nucleus.pydantic_base import DictCompatibleModel
412

@@ -14,6 +22,7 @@ class DatasetInfo(DictCompatibleModel):
1422
slice_ids: List :class:`Slice` IDs associated with the :class:`Dataset`
1523
annotation_metadata_schema: Dict defining annotation-level metadata schema.
1624
item_metadata_schema: Dict defining item metadata schema.
25+
tags: List of tags associated with the :class:`Dataset`.
1726
"""
1827

1928
dataset_id: str
@@ -24,3 +33,8 @@ class DatasetInfo(DictCompatibleModel):
2433
# TODO: Expand the following into pydantic models to formalize schema
2534
annotation_metadata_schema: Optional[Dict[str, Any]] = None
2635
item_metadata_schema: Optional[Dict[str, Any]] = None
36+
tags: List[str] = []
37+
38+
@validator("tags", pre=True, always=True) # pylint: disable=used-before-assignment
39+
def coerce_null_tags(cls, v): # pylint: disable=no-self-argument
40+
return v if v is not None else []

nucleus/dataset.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -432,6 +432,49 @@ def info(self) -> DatasetInfo:
432432
dataset_info = DatasetInfo.parse_obj(response)
433433
return dataset_info
434434

435+
def get_tags(self) -> List[str]:
436+
"""Fetches tags associated with the dataset.
437+
438+
Returns:
439+
List of tag strings associated with this dataset.
440+
"""
441+
response = self._client.make_request(
442+
{}, f"dataset/{self.id}/tags", requests.get
443+
)
444+
return response["tags"]
445+
446+
def add_tags(self, tags: List[str]) -> List[str]:
447+
"""Adds tags to the dataset.
448+
449+
Args:
450+
tags: List of tag strings to add.
451+
452+
Returns:
453+
Updated list of all tags on the dataset.
454+
"""
455+
if isinstance(tags, str):
456+
raise TypeError("tags must be a list of strings, not a single string")
457+
response = self._client.make_request(
458+
{"tags": tags}, f"dataset/{self.id}/tags", requests.post
459+
)
460+
return response["tags"]
461+
462+
def remove_tags(self, tags: List[str]) -> List[str]:
463+
"""Removes tags from the dataset.
464+
465+
Args:
466+
tags: List of tag strings to remove.
467+
468+
Returns:
469+
Updated list of remaining tags on the dataset.
470+
"""
471+
if isinstance(tags, str):
472+
raise TypeError("tags must be a list of strings, not a single string")
473+
response = self._client.make_request(
474+
{"tags": tags}, f"dataset/{self.id}/tags", requests.delete
475+
)
476+
return response["tags"]
477+
435478
@deprecated(
436479
"Model runs have been deprecated and will be removed. Use a Model instead"
437480
)

0 commit comments

Comments
 (0)