Skip to content

Commit 8f28e83

Browse files
committed
feat: provide helper func to find objs with schema key within data instance
Test for the helper func is included in this commit as well
1 parent 89e2cb7 commit 8f28e83

2 files changed

Lines changed: 177 additions & 1 deletion

File tree

dandischema/tests/test_utils.py

Lines changed: 150 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
1-
from typing import Dict, List, Optional, Union
1+
from typing import Any, Dict, List, Optional, Union
22

33
import pytest
44

55
from ..utils import (
66
_ensure_newline,
7+
find_objs,
78
name2title,
89
sanitize_value,
910
strip_top_level_optional,
@@ -88,3 +89,151 @@ def test_sanitize_value() -> None:
8889
assert sanitize_value("A;B") == "A-B"
8990
assert sanitize_value("A\\/B") == "A--B"
9091
assert sanitize_value("A\"'B") == "A--B"
92+
93+
94+
@pytest.mark.parametrize(
95+
"instance, schema_key, expected",
96+
[
97+
# Single matching object.
98+
pytest.param(
99+
{"schemaKey": "Test", "data": 123},
100+
"Test",
101+
[{"schemaKey": "Test", "data": 123}],
102+
id="single-match",
103+
),
104+
# No match.
105+
pytest.param(
106+
{"schemaKey": "NotMatch", "data": 123},
107+
"Test",
108+
[],
109+
id="no-match",
110+
),
111+
# Empty dictionary should return an empty list.
112+
pytest.param(
113+
{},
114+
"Test",
115+
[],
116+
id="empty-dict",
117+
),
118+
# Empty list should return an empty list.
119+
pytest.param(
120+
[],
121+
"Test",
122+
[],
123+
id="empty-list",
124+
),
125+
# Nested dictionary: the matching object is nested within another dictionary.
126+
pytest.param(
127+
{"level1": {"schemaKey": "Test", "info": "nested"}},
128+
"Test",
129+
[{"schemaKey": "Test", "info": "nested"}],
130+
id="nested-dict",
131+
),
132+
# List of dictionaries: only those with matching schema key are returned.
133+
pytest.param(
134+
[
135+
{"schemaKey": "Test", "data": 1},
136+
{"schemaKey": "Test", "data": 2},
137+
{"schemaKey": "NotTest", "data": 3},
138+
],
139+
"Test",
140+
[
141+
{"schemaKey": "Test", "data": 1},
142+
{"schemaKey": "Test", "data": 2},
143+
],
144+
id="list-of-dicts",
145+
),
146+
# Mixed structure: nested dictionaries and lists.
147+
pytest.param(
148+
{
149+
"a": {"schemaKey": "Test", "value": 1},
150+
"b": [
151+
{"schemaKey": "NotTest", "value": 2},
152+
{"schemaKey": "Test", "value": 3},
153+
],
154+
"c": "irrelevant",
155+
"d": [{"e": {"schemaKey": "Test", "value": 4}}],
156+
},
157+
"Test",
158+
[
159+
{"schemaKey": "Test", "value": 1},
160+
{"schemaKey": "Test", "value": 3},
161+
{"schemaKey": "Test", "value": 4},
162+
],
163+
id="mixed-structure",
164+
),
165+
# Non-collection type: integer.
166+
pytest.param(
167+
42,
168+
"Test",
169+
[],
170+
id="non-collection-int",
171+
),
172+
# Non-collection type: string.
173+
pytest.param(
174+
"some string",
175+
"Test",
176+
[],
177+
id="non-collection-string",
178+
),
179+
# Non-collection type: float.
180+
pytest.param(
181+
3.14,
182+
"Test",
183+
[],
184+
id="non-collection-float",
185+
),
186+
# Non-collection type: None.
187+
pytest.param(
188+
None,
189+
"Test",
190+
[],
191+
id="non-collection-None",
192+
),
193+
# Nested child: an object with the schema key contains a nested child that also
194+
# has the schema key.
195+
pytest.param(
196+
{"schemaKey": "Test", "child": {"schemaKey": "Test", "data": "child"}},
197+
"Test",
198+
[
199+
{"schemaKey": "Test", "child": {"schemaKey": "Test", "data": "child"}},
200+
{"schemaKey": "Test", "data": "child"},
201+
],
202+
id="nested-child",
203+
),
204+
# List in field:
205+
# The object with the given schema key has a field whose value is a list
206+
# containing objects, some of which also have the given schema key.
207+
pytest.param(
208+
{
209+
"schemaKey": "Test",
210+
"items": [
211+
{"schemaKey": "Test", "data": "item1"},
212+
{"schemaKey": "Other", "data": "item2"},
213+
{"schemaKey": "Test", "data": "item3"},
214+
],
215+
},
216+
"Test",
217+
[
218+
# The outer object is returned first...
219+
{
220+
"schemaKey": "Test",
221+
"items": [
222+
{"schemaKey": "Test", "data": "item1"},
223+
{"schemaKey": "Other", "data": "item2"},
224+
{"schemaKey": "Test", "data": "item3"},
225+
],
226+
},
227+
# ...followed by the matching objects within the list.
228+
{"schemaKey": "Test", "data": "item1"},
229+
{"schemaKey": "Test", "data": "item3"},
230+
],
231+
id="list-in-field",
232+
),
233+
],
234+
)
235+
def test_find_objs_parametrized(
236+
instance: Any, schema_key: str, expected: list[dict]
237+
) -> None:
238+
result = find_objs(instance, schema_key)
239+
assert result == expected

dandischema/utils.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,3 +136,30 @@ def sanitize_value(value: str, field: str = "non-extension", sub: str = "-") ->
136136
if field != "extension":
137137
value = value.replace(".", sub)
138138
return value
139+
140+
141+
def find_objs(instance: Any, schema_key: str) -> list[dict]:
142+
"""
143+
Find JSON objects, represented as dictionaries, that possess a specified schema key
144+
as the value of their `"schemaKey"` field, from a data instance
145+
146+
:param instance: The data instance to fetch JSON objects from
147+
:param schema_key: The schema key
148+
:return: The list of JSON objects with the specified schema key in the data instance
149+
"""
150+
151+
def find_objs_(data: Any) -> None:
152+
if isinstance(data, dict):
153+
if "schemaKey" in data and data["schemaKey"] == schema_key:
154+
objs.append(data)
155+
for value in data.values():
156+
find_objs_(value)
157+
elif isinstance(data, list):
158+
for item in data:
159+
find_objs_(item)
160+
else:
161+
return
162+
163+
objs: list[dict] = []
164+
find_objs_(instance)
165+
return objs

0 commit comments

Comments
 (0)