From 39d8a730c74890cbaecba5bd7ef53fbb9ee5fac3 Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Thu, 27 Feb 2025 13:57:10 +0100 Subject: [PATCH 01/44] added support of slicing for IDSStructArray --- imas/ids_struct_array.py | 26 ++++++++++++++++++++------ imas/test/test_ids_struct_array.py | 12 ++++++++++++ 2 files changed, 32 insertions(+), 6 deletions(-) diff --git a/imas/ids_struct_array.py b/imas/ids_struct_array.py index b176864..38e8165 100644 --- a/imas/ids_struct_array.py +++ b/imas/ids_struct_array.py @@ -4,6 +4,7 @@ """ import logging +import sys from copy import deepcopy from typing import Optional, Tuple @@ -121,12 +122,25 @@ def _element_structure(self): return struct def __getitem__(self, item): - # value is a list, so the given item should be convertable to integer - # TODO: perhaps we should allow slices as well? - list_idx = int(item) - if self._lazy: - self._load(item) - return self.value[list_idx] + # allow slices + if isinstance(item, slice): + if self._lazy: + start, stop, step = item.start, item.stop, item.step + if stop is None: + stop = sys.maxsize + + for i in range(start or 0, stop, step or 1): + try: + self._load(i) + except IndexError: + break + return self.value[item] + else: + # value is a list, so the given item should be convertable to integer + list_idx = int(item) + if self._lazy: + self._load(item) + return self.value[list_idx] def __setitem__(self, item, value): # value is a list, so the given item should be convertable to integer diff --git a/imas/test/test_ids_struct_array.py b/imas/test/test_ids_struct_array.py index ab128df..8c31f22 100644 --- a/imas/test/test_ids_struct_array.py +++ b/imas/test/test_ids_struct_array.py @@ -87,3 +87,15 @@ def test_struct_array_eq(): assert cp1.profiles_1d != cp2.profiles_1d cp2.profiles_1d[0].time = 1 assert cp1.profiles_1d == cp2.profiles_1d + + +def test_struct_array_slice(): + cp1 = IDSFactory("3.39.0").core_profiles() + cp1.profiles_1d.resize(20) + + assert len(cp1.profiles_1d) == 20 + assert len(cp1.profiles_1d[:]) == 20 + assert len(cp1.profiles_1d[5:10]) == 5 + assert len(cp1.profiles_1d[10:]) == 10 + assert len(cp1.profiles_1d[:5]) == 5 + assert len(cp1.profiles_1d[::2]) == 10 From 67043752ee57dd3cbc7fea5ac213849802723b2e Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Mon, 17 Mar 2025 14:00:34 +0100 Subject: [PATCH 02/44] removed pull_request event --- .github/workflows/test_with_pytest.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test_with_pytest.yml b/.github/workflows/test_with_pytest.yml index 4febc7a..e537f5b 100644 --- a/.github/workflows/test_with_pytest.yml +++ b/.github/workflows/test_with_pytest.yml @@ -1,6 +1,6 @@ name: Test using pytest -on: [push, pull_request] +on: push jobs: test: From 7119a18b5e9571148b16b9fc78c845b5e9aa9dc6 Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Thu, 6 Nov 2025 16:27:58 +0100 Subject: [PATCH 03/44] added ids_slice.py and updated IDSStructArray to return IDSSlice object and added flatten and values functions --- imas/ids_slice.py | 281 +++++++++++++++++++++ imas/ids_struct_array.py | 57 ++++- imas/test/test_ids_slice.py | 477 ++++++++++++++++++++++++++++++++++++ 3 files changed, 809 insertions(+), 6 deletions(-) create mode 100644 imas/ids_slice.py create mode 100644 imas/test/test_ids_slice.py diff --git a/imas/ids_slice.py b/imas/ids_slice.py new file mode 100644 index 0000000..b561c23 --- /dev/null +++ b/imas/ids_slice.py @@ -0,0 +1,281 @@ +# This file is part of IMAS-Python. +# You should have received the IMAS-Python LICENSE file with this project. +"""IDSSlice represents a collection of IDS nodes matching a slice expression. + +This module provides the IDSSlice class, which enables slicing of arrays of +structures while maintaining the hierarchy and allowing further operations on +the resulting collection. +""" + +import logging +from typing import Any, Iterator, List, Union + + +from imas.ids_base import IDSBase + +logger = logging.getLogger(__name__) + + +class IDSSlice(IDSBase): + """Represents a slice of IDS struct array elements. + + When slicing an IDSStructArray, instead of returning a regular Python list, + an IDSSlice is returned. This allows for: + - Tracking the slice operation in the path + - Further slicing of child elements + - Attribute access on all matched elements + - Iteration over matched elements + """ + + __slots__ = ["_parent", "metadata", "_matched_elements", "_slice_path", "_lazy"] + + def __init__( + self, + parent: IDSBase, + metadata: Any, + matched_elements: List[IDSBase], + slice_path: str, + ): + """Initialize IDSSlice. + + Args: + parent: The parent IDSStructArray that was sliced + metadata: Metadata from the parent array + matched_elements: List of elements that matched the slice + slice_path: String representation of the slice operation (e.g., "[8:]") + """ + self._parent = parent + self.metadata = metadata + self._matched_elements = matched_elements + self._slice_path = slice_path + self._lazy = parent._lazy + + @property + def _toplevel(self): + """Return the toplevel instance this node belongs to""" + return self._parent._toplevel + + @property + def _path(self) -> str: + """Build the path to this slice. + + The path includes the parent's path plus the slice operation. + """ + return self._parent._path + self._slice_path + + def __len__(self) -> int: + """Return the number of elements matched by this slice.""" + return len(self._matched_elements) + + def __iter__(self) -> Iterator[IDSBase]: + """Iterate over all matched elements.""" + return iter(self._matched_elements) + + def __getitem__(self, item: Union[int, slice]) -> Union[IDSBase, "IDSSlice"]: + """Get element(s) from the slice. + + Args: + item: Index or slice to apply to the matched elements + + Returns: + A single element if item is an int, or an IDSSlice if item is a slice + + Raises: + IndexError: If the index is out of range + AttributeError: If trying to index into elements that aren't indexable + """ + if isinstance(item, slice): + # Further slice the matched elements + sliced_elements = self._matched_elements[item] + if not isinstance(sliced_elements, list): + sliced_elements = [sliced_elements] + + # Build the slice path representation + slice_str = self._format_slice(item) + new_path = self._slice_path + slice_str + + return IDSSlice( + self._parent, + self.metadata, + sliced_elements, + new_path, + ) + else: + # Return a single element by index + return self._matched_elements[int(item)] + + def __getattr__(self, name: str) -> "IDSSlice": + """Access a child attribute on all matched elements. + + This returns a new IDSSlice containing the child attribute from + each matched element. + + Args: + name: Name of the attribute to access + + Returns: + A new IDSSlice containing the child attribute from each matched element + + Raises: + AttributeError: If the attribute doesn't exist + """ + # Avoid issues with special attributes + if name.startswith("_"): + raise AttributeError(f"IDSSlice has no attribute '{name}'") + + # Access the attribute on each element + child_elements = [] + + for element in self._matched_elements: + child = getattr(element, name) + child_elements.append(child) + + # Build the new path including the attribute access + new_path = self._slice_path + "." + name + + return IDSSlice( + self._parent, + None, # metadata is not directly applicable to the child + child_elements, + new_path, + ) + + def __repr__(self) -> str: + """Build a string representation of this slice.""" + toplevel_name = self._toplevel.metadata.name + matches_count = len(self._matched_elements) + match_word = "match" if matches_count == 1 else "matches" + return ( + f"" + ) + + def _build_repr_start(self) -> str: + """Build the start of the string representation. + + This is used for consistency with other IDS node types. + """ + return ( + f"<{type(self).__name__} (IDS:{self._toplevel.metadata.name}, {self._path}" + ) + + def values(self) -> List[Any]: + """Extract raw values from elements in this slice. + + For IDSPrimitive elements, this extracts the wrapped value. + For other element types, returns them as-is. + + This is useful for getting the actual data without the IDS wrapper + when accessing scalar fields through a slice, without requiring + explicit looping through the original collection. + + Returns: + List of raw Python/numpy values or other unwrapped elements + + Examples: + >>> # Get names from identifiers without looping + >>> n = edge_profiles.grid_ggd[0].grid_subset[:].identifier.name.values() + >>> # Result: ["nodes", "edges", "cells"] + >>> + >>> # Works with any scalar or array type + >>> i = edge_profiles.grid_ggd[0].grid_subset[:].identifier.index.values() + >>> # Result: [1, 2, 5] + >>> + >>> # Still works with structures (returns unwrapped) + >>> ions = profiles[:].ion.values() + >>> # Result: [IDSStructure(...), IDSStructure(...), ...] + """ + from imas.ids_primitive import IDSPrimitive + + result = [] + for element in self._matched_elements: + if isinstance(element, IDSPrimitive): + # Extract the wrapped value from IDSPrimitive + result.append(element.value) + else: + # Return other types as-is (structures, arrays, etc.) + result.append(element) + return result + + def flatten(self, recursive: bool = False) -> "IDSSlice": + """Flatten nested arrays into a single IDSSlice. + + This method is useful for MATLAB-style matrix-like access. + It flattens matched elements that are themselves iterable + (such as IDSStructArray) into a single flat IDSSlice. + + Args: + recursive: If True, recursively flatten nested IDSSlices. + If False (default), only flatten one level. + + Returns: + New IDSSlice with flattened elements + + Examples: + >>> # Get all ions from 2 profiles as a flat list + >>> all_ions = cp.profiles_1d[:2].ion.flatten() + >>> len(all_ions) # Number of total ions + 10 + >>> # Iterate over all ions + >>> for ion in all_ions: + ... print(ion.label) + + >>> # Flatten recursively for deeply nested structures + >>> deeply_nested = obj.level1[:].level2[:].flatten(recursive=True) + """ + from imas.ids_struct_array import IDSStructArray + + flattened = [] + + for element in self._matched_elements: + if isinstance(element, IDSStructArray): + # Flatten IDSStructArray elements + flattened.extend(list(element)) + elif recursive and isinstance(element, IDSSlice): + # Recursively flatten nested IDSSlices + flattened.extend(list(element.flatten(recursive=True))) + else: + # Keep non-array elements as-is + flattened.append(element) + + new_path = self._slice_path + ".flatten()" + return IDSSlice( + self._parent, + None, + flattened, + new_path, + ) + + @staticmethod + def _format_slice(slice_obj: slice) -> str: + """Format a slice object as a string. + + Args: + slice_obj: The slice object to format + + Returns: + String representation like "[1:5]", "[::2]", etc. + """ + start = slice_obj.start if slice_obj.start is not None else "" + stop = slice_obj.stop if slice_obj.stop is not None else "" + step = slice_obj.step if slice_obj.step is not None else "" + + if step: + return f"[{start}:{stop}:{step}]" + else: + return f"[{start}:{stop}]" + + def _validate(self) -> None: + """Validate all matched elements.""" + for element in self._matched_elements: + element._validate() + + def _xxhash(self) -> bytes: + """Compute hash of all matched elements.""" + from xxhash import xxh3_64 + + hsh = xxh3_64(len(self._matched_elements).to_bytes(8, "little")) + for element in self._matched_elements: + hsh.update(element._xxhash()) + return hsh.digest() diff --git a/imas/ids_struct_array.py b/imas/ids_struct_array.py index b176864..56e0706 100644 --- a/imas/ids_struct_array.py +++ b/imas/ids_struct_array.py @@ -120,13 +120,58 @@ def _element_structure(self): struct = IDSStructure(self, self.metadata) return struct + @staticmethod + def _format_slice(slice_obj: slice) -> str: + """Format a slice object as a string. + + Args: + slice_obj: The slice object to format + + Returns: + String representation like "[1:5]", "[::2]", etc. + """ + start = slice_obj.start if slice_obj.start is not None else "" + stop = slice_obj.stop if slice_obj.stop is not None else "" + step = slice_obj.step if slice_obj.step is not None else "" + + if step: + return f"[{start}:{stop}:{step}]" + else: + return f"[{start}:{stop}]" + def __getitem__(self, item): - # value is a list, so the given item should be convertable to integer - # TODO: perhaps we should allow slices as well? - list_idx = int(item) - if self._lazy: - self._load(item) - return self.value[list_idx] + """Get element(s) from the struct array. + + Args: + item: Integer index or slice object + + Returns: + A single IDSStructure if item is an int, or an IDSSlice if item is a slice + """ + if isinstance(item, slice): + # Handle slice by returning an IDSSlice + from imas.ids_slice import IDSSlice + + # Get the matched elements + matched_elements = self.value[item] + if not isinstance(matched_elements, list): + matched_elements = [matched_elements] + + # Build the slice path representation + slice_str = self._format_slice(item) + + return IDSSlice( + self, + self.metadata, + matched_elements, + slice_str, + ) + else: + # Handle integer index + list_idx = int(item) + if self._lazy: + self._load(item) + return self.value[list_idx] def __setitem__(self, item, value): # value is a list, so the given item should be convertable to integer diff --git a/imas/test/test_ids_slice.py b/imas/test/test_ids_slice.py new file mode 100644 index 0000000..fb268c4 --- /dev/null +++ b/imas/test/test_ids_slice.py @@ -0,0 +1,477 @@ +# This file is part of IMAS-Python. +# You should have received the IMAS-Python LICENSE file with this project. + +import numpy as np +import pytest + +from imas.ids_factory import IDSFactory +from imas.ids_slice import IDSSlice + + +@pytest.fixture +def wall_with_units(): + return create_wall_with_units() + + +@pytest.fixture +def wall_varying_sizes(): + return create_wall_with_units(total_units=2, element_counts=[4, 2]) + + +def create_wall_with_units( + total_units: int = 12, + element_counts=None, + *, + dd_version: str = "3.39.0", +): + + if total_units < 2: + raise ValueError("Need at least two units to exercise slice edge cases.") + + wall = IDSFactory(dd_version).wall() + wall.description_2d.resize(1) + + units = wall.description_2d[0].vessel.unit + units.resize(total_units) + + if element_counts is None: + # Ensure unit index 1 has fewer elements than unit 0 to trigger the corner case. + element_counts = [4, 2] + [3] * (total_units - 2) + + element_counts = list(element_counts) + if len(element_counts) != total_units: + raise ValueError("element_counts length must match total_units.") + + for unit_idx, unit in enumerate(units): + unit.name = f"unit-{unit_idx}" + unit.element.resize(element_counts[unit_idx]) + for elem_idx, element in enumerate(unit.element): + element.name = f"element-{unit_idx}-{elem_idx}" + + return wall + + +def safe_element_lookup(units_slice, element_index: int): + collected = [] + skipped_units = [] + for idx, unit in enumerate(units_slice): + elements = unit.element + if element_index >= len(elements): + skipped_units.append(idx) + continue + collected.append(elements[element_index].name.value) + return {"collected": collected, "skipped_units": skipped_units} + + +class TestBasicSlicing: + + def test_slice_with_start_and_stop(self): + cp = IDSFactory("3.39.0").core_profiles() + cp.profiles_1d.resize(10) + + result = cp.profiles_1d[3:7] + assert isinstance(result, IDSSlice) + assert len(result) == 4 + + result = cp.profiles_1d[::2] + assert isinstance(result, IDSSlice) + assert len(result) == 5 + + result = cp.profiles_1d[-5:] + assert isinstance(result, IDSSlice) + assert len(result) == 5 + + def test_slice_corner_cases(self): + cp = IDSFactory("3.39.0").core_profiles() + cp.profiles_1d.resize(10) + + result = cp.profiles_1d[0:100] + assert len(result) == 10 + + result = cp.profiles_1d[10:20] + assert len(result) == 0 + + result = cp.profiles_1d[::-1] + assert len(result) == 10 + + def test_integer_index_still_works(self): + cp = IDSFactory("3.39.0").core_profiles() + cp.profiles_1d.resize(10) + + result = cp.profiles_1d[5] + assert not isinstance(result, IDSSlice) + assert hasattr(result, "_path") + + +class TestIDSSlicePath: + + def test_slice_path_representation(self): + cp = IDSFactory("3.39.0").core_profiles() + cp.profiles_1d.resize(10) + + result = cp.profiles_1d[5:8] + expected_path = "profiles_1d[5:8]" + assert expected_path in result._path + + result = cp.profiles_1d[5:8][1:3] + assert "[" in result._path + + def test_attribute_access_path(self, wall_with_units): + wall = wall_with_units + units = wall.description_2d[0].vessel.unit[8:] + + element_slice = units.element + assert "element" in element_slice._path + + +class TestIDSSliceIteration: + + def test_iteration_and_len(self): + cp = IDSFactory("3.39.0").core_profiles() + cp.profiles_1d.resize(5) + + slice_obj = cp.profiles_1d[1:4] + + items = list(slice_obj) + assert len(items) == 3 + + assert len(slice_obj) == 3 + + +class TestIDSSliceIndexing: + + def test_integer_indexing_slice(self): + cp = IDSFactory("3.39.0").core_profiles() + cp.profiles_1d.resize(10) + + slice_obj = cp.profiles_1d[3:7] + element = slice_obj[1] + assert not isinstance(element, IDSSlice) + + def test_slice_indexing_slice(self): + cp = IDSFactory("3.39.0").core_profiles() + cp.profiles_1d.resize(10) + + slice_obj = cp.profiles_1d[2:8] + nested_slice = slice_obj[1:4] + assert isinstance(nested_slice, IDSSlice) + assert len(nested_slice) == 3 + + +class TestIDSSliceAttributeAccess: + + def test_attribute_access_nested_attributes(self, wall_with_units): + wall = wall_with_units + units = wall.description_2d[0].vessel.unit[8:] + + names = units.name + assert isinstance(names, IDSSlice) + assert len(names) == 4 + + units_full = wall.description_2d[0].vessel.unit + elements = units_full[:].element + assert isinstance(elements, IDSSlice) + + +class TestIDSSliceRepr: + + def test_repr_count_display(self): + cp = IDSFactory("3.39.0").core_profiles() + cp.profiles_1d.resize(10) + + slice_obj = cp.profiles_1d[5:6] + repr_str = repr(slice_obj) + assert "IDSSlice" in repr_str + assert "1 match" in repr_str + + slice_obj = cp.profiles_1d[5:8] + repr_str = repr(slice_obj) + assert "IDSSlice" in repr_str + assert "3 matches" in repr_str + + +class TestIDSSliceValidation: + + def test_validate_slice(self): + cp = IDSFactory("3.39.0").core_profiles() + cp.profiles_1d.resize(2) + cp.ids_properties.homogeneous_time = 1 + + slice_obj = cp.profiles_1d[:] + assert isinstance(slice_obj, IDSSlice) + + +class TestIDSSliceHash: + + def test_xxhash(self): + cp = IDSFactory("3.39.0").core_profiles() + cp.profiles_1d.resize(5) + + slice_obj = cp.profiles_1d[:] + hash_bytes = slice_obj._xxhash() + assert isinstance(hash_bytes, bytes) + assert len(hash_bytes) > 0 + + +class TestWallExampleSlicing: + + def test_wall_units_nested_element_access(self, wall_with_units): + wall = wall_with_units + units = wall.description_2d[0].vessel.unit + + units_slice = units[8:] + assert isinstance(units_slice, IDSSlice) + assert len(units_slice) == 4 + + elements_slice = units_slice.element + assert isinstance(elements_slice, IDSSlice) + + +class TestEdgeCases: + + def test_slice_empty_array(self): + cp = IDSFactory("3.39.0").core_profiles() + + result = cp.profiles_1d[:] + assert isinstance(result, IDSSlice) + assert len(result) == 0 + + def test_slice_single_element(self): + cp = IDSFactory("3.39.0").core_profiles() + cp.profiles_1d.resize(1) + + result = cp.profiles_1d[:] + assert isinstance(result, IDSSlice) + assert len(result) == 1 + + def test_invalid_step_zero(self): + cp = IDSFactory("3.39.0").core_profiles() + cp.profiles_1d.resize(10) + + with pytest.raises(ValueError): + cp.profiles_1d[::0] + + +class TestFlatten: + + def test_flatten_basic_and_partial(self): + cp = IDSFactory("3.39.0").core_profiles() + cp.profiles_1d.resize(3) + + for profile in cp.profiles_1d: + profile.ion.resize(5) + + slice_obj = cp.profiles_1d[:].ion + flattened = slice_obj.flatten() + assert isinstance(flattened, IDSSlice) + assert len(flattened) == 15 + + cp2 = IDSFactory("3.39.0").core_profiles() + cp2.profiles_1d.resize(4) + for profile in cp2.profiles_1d: + profile.ion.resize(3) + flattened2 = cp2.profiles_1d[:2].ion.flatten() + assert len(flattened2) == 6 + + def test_flatten_empty_and_single(self): + cp = IDSFactory("3.39.0").core_profiles() + cp.profiles_1d.resize(2) + empty_flattened = cp.profiles_1d[:].ion.flatten() + assert len(empty_flattened) == 0 + + cp2 = IDSFactory("3.39.0").core_profiles() + cp2.profiles_1d.resize(1) + cp2.profiles_1d[0].ion.resize(4) + single_flattened = cp2.profiles_1d[:].ion.flatten() + assert len(single_flattened) == 4 + + def test_flatten_indexing_and_slicing(self): + cp = IDSFactory("3.39.0").core_profiles() + cp.profiles_1d.resize(2) + + for i, profile in enumerate(cp.profiles_1d): + profile.ion.resize(3) + for j, ion in enumerate(profile.ion): + ion.label = f"ion_{i}_{j}" + + flattened = cp.profiles_1d[:].ion.flatten() + + assert flattened[0].label == "ion_0_0" + assert flattened[3].label == "ion_1_0" + + subset = flattened[1:4] + assert isinstance(subset, IDSSlice) + assert len(subset) == 3 + labels = [ion.label for ion in subset] + assert labels == ["ion_0_1", "ion_0_2", "ion_1_0"] + + def test_flatten_repr_and_path(self): + cp = IDSFactory("3.39.0").core_profiles() + cp.profiles_1d.resize(2) + for profile in cp.profiles_1d: + profile.ion.resize(2) + + flattened = cp.profiles_1d[:].ion.flatten() + repr_str = repr(flattened) + + assert "IDSSlice" in repr_str + assert "4 matches" in repr_str + assert ".flatten()" in flattened._path + + def test_flatten_complex_case(self, wall_with_units): + wall = wall_with_units + units = wall.description_2d[0].vessel.unit[:5] + + all_elements = units.element.flatten() + assert len(all_elements) == 4 + 2 + 3 + 3 + 3 + + +class TestVaryingArraySizeIndexing: + + def test_unit_slice_element_integer_indexing(self, wall_varying_sizes): + units = wall_varying_sizes.description_2d[0].vessel.unit + units_slice = units[:2] + element_slice = units_slice.element + + with pytest.raises(IndexError): + element_slice[2] + + def test_unit_slice_element_safe_indexing_scenarios(self, wall_varying_sizes): + units = wall_varying_sizes.description_2d[0].vessel.unit + units_slice = units[:2] + + result = safe_element_lookup(units_slice, 1) + assert len(result["collected"]) == 2 + assert result["collected"] == ["element-0-1", "element-1-1"] + + result = safe_element_lookup(units_slice, 2) + assert len(result["collected"]) == 1 + assert result["skipped_units"] == [1] + + result = safe_element_lookup(units_slice, 4) + assert len(result["collected"]) == 0 + assert result["skipped_units"] == [0, 1] + + def test_unit_slice_element_individual_access(self, wall_varying_sizes): + units = wall_varying_sizes.description_2d[0].vessel.unit + element_slice = units[:2].element + + array_0 = element_slice[0] + assert len(array_0) == 4 + assert array_0[2].name.value == "element-0-2" + + array_1 = element_slice[1] + assert len(array_1) == 2 + + with pytest.raises(IndexError): + array_1[2] + + def test_wall_with_diverse_element_counts(self): + wall = create_wall_with_units(total_units=5, element_counts=[3, 1, 4, 2, 5]) + + units = wall.description_2d[0].vessel.unit + units_slice = units[:3] + element_slice = units_slice.element + + assert len(element_slice[0]) == 3 + assert len(element_slice[1]) == 1 + assert len(element_slice[2]) == 4 + + result = safe_element_lookup(units_slice, 2) + assert len(result["collected"]) == 2 + assert result["skipped_units"] == [1] + + +class TestIDSSliceValues: + + def test_values_basic_extraction(self, wall_with_units): + wall = wall_with_units + units = wall.description_2d[0].vessel.unit + + names_slice = units[:].name + names = names_slice.values() + + assert isinstance(names, list) + assert len(names) == 12 + assert all(isinstance(name, str) and name.startswith("unit-") for name in names) + assert names == [f"unit-{i}" for i in range(12)] + + def test_values_integer_and_float_extraction(self): + cp = IDSFactory("3.39.0").core_profiles() + cp.profiles_1d.resize(3) + + for profile in cp.profiles_1d: + profile.ion.resize(2) + for i, ion in enumerate(profile.ion): + ion.neutral_index = i + ion.z_ion = float(i + 1) + + ions = cp.profiles_1d[:].ion.flatten() + indices = ions[:].neutral_index.values() + assert all(isinstance(idx, (int, np.integer)) for idx in indices) + + z_values = ions[:].z_ion.values() + assert all(isinstance(z, (float, np.floating)) for z in z_values) + + def test_values_partial_and_empty_slices(self, wall_with_units): + wall = wall_with_units + units = wall.description_2d[0].vessel.unit + + names = units[:5].name.values() + assert len(names) == 5 + assert names == [f"unit-{i}" for i in range(5)] + + cp = IDSFactory("3.39.0").core_profiles() + cp.profiles_1d.resize(5) + empty_values = cp.profiles_1d[5:10].label.values() + assert len(empty_values) == 0 + + def test_values_with_step_and_negative_indices(self, wall_with_units): + wall = wall_with_units + units = wall.description_2d[0].vessel.unit + + names_step = units[::2].name.values() + assert len(names_step) == 6 + assert names_step == [f"unit-{i}" for i in range(0, 12, 2)] + + names_neg = units[-3:].name.values() + assert len(names_neg) == 3 + assert names_neg == [f"unit-{i}" for i in range(9, 12)] + + def test_values_structure_preservation(self): + cp = IDSFactory("3.39.0").core_profiles() + cp.profiles_1d.resize(3) + + for profile in cp.profiles_1d: + profile.ion.resize(2) + + ions = cp.profiles_1d[:].ion.flatten().values() + + assert len(ions) == 6 + for ion in ions: + assert hasattr(ion, "_path") + from imas.ids_primitive import IDSPrimitive + + assert not isinstance(ion, IDSPrimitive) + + def test_values_array_primitives(self): + cp = IDSFactory("3.39.0").core_profiles() + cp.profiles_1d.resize(2) + + cp.profiles_1d[0].grid.psi = np.linspace(0, 1, 10) + cp.profiles_1d[1].grid.psi = np.linspace(1, 2, 10) + + psi_values = cp.profiles_1d[:].grid.psi.values() + + assert len(psi_values) == 2 + assert all(isinstance(psi, np.ndarray) for psi in psi_values) + + def test_values_consistency_with_iteration(self, wall_with_units): + wall = wall_with_units + units = wall.description_2d[0].vessel.unit + + names_via_values = units[:5].name.values() + + names_via_iteration = [unit.name.value for unit in units[:5]] + + assert names_via_values == names_via_iteration From 78727b45c5883ed6563c12c84542073ba9508d99 Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Thu, 6 Nov 2025 16:27:58 +0100 Subject: [PATCH 04/44] added ids_slice.py and updated IDSStructArray to return IDSSlice object and added flatten and values functions --- imas/ids_slice.py | 281 +++++++++++++++++++++ imas/ids_struct_array.py | 58 ++++- imas/test/test_ids_slice.py | 477 ++++++++++++++++++++++++++++++++++++ 3 files changed, 803 insertions(+), 13 deletions(-) create mode 100644 imas/ids_slice.py create mode 100644 imas/test/test_ids_slice.py diff --git a/imas/ids_slice.py b/imas/ids_slice.py new file mode 100644 index 0000000..b561c23 --- /dev/null +++ b/imas/ids_slice.py @@ -0,0 +1,281 @@ +# This file is part of IMAS-Python. +# You should have received the IMAS-Python LICENSE file with this project. +"""IDSSlice represents a collection of IDS nodes matching a slice expression. + +This module provides the IDSSlice class, which enables slicing of arrays of +structures while maintaining the hierarchy and allowing further operations on +the resulting collection. +""" + +import logging +from typing import Any, Iterator, List, Union + + +from imas.ids_base import IDSBase + +logger = logging.getLogger(__name__) + + +class IDSSlice(IDSBase): + """Represents a slice of IDS struct array elements. + + When slicing an IDSStructArray, instead of returning a regular Python list, + an IDSSlice is returned. This allows for: + - Tracking the slice operation in the path + - Further slicing of child elements + - Attribute access on all matched elements + - Iteration over matched elements + """ + + __slots__ = ["_parent", "metadata", "_matched_elements", "_slice_path", "_lazy"] + + def __init__( + self, + parent: IDSBase, + metadata: Any, + matched_elements: List[IDSBase], + slice_path: str, + ): + """Initialize IDSSlice. + + Args: + parent: The parent IDSStructArray that was sliced + metadata: Metadata from the parent array + matched_elements: List of elements that matched the slice + slice_path: String representation of the slice operation (e.g., "[8:]") + """ + self._parent = parent + self.metadata = metadata + self._matched_elements = matched_elements + self._slice_path = slice_path + self._lazy = parent._lazy + + @property + def _toplevel(self): + """Return the toplevel instance this node belongs to""" + return self._parent._toplevel + + @property + def _path(self) -> str: + """Build the path to this slice. + + The path includes the parent's path plus the slice operation. + """ + return self._parent._path + self._slice_path + + def __len__(self) -> int: + """Return the number of elements matched by this slice.""" + return len(self._matched_elements) + + def __iter__(self) -> Iterator[IDSBase]: + """Iterate over all matched elements.""" + return iter(self._matched_elements) + + def __getitem__(self, item: Union[int, slice]) -> Union[IDSBase, "IDSSlice"]: + """Get element(s) from the slice. + + Args: + item: Index or slice to apply to the matched elements + + Returns: + A single element if item is an int, or an IDSSlice if item is a slice + + Raises: + IndexError: If the index is out of range + AttributeError: If trying to index into elements that aren't indexable + """ + if isinstance(item, slice): + # Further slice the matched elements + sliced_elements = self._matched_elements[item] + if not isinstance(sliced_elements, list): + sliced_elements = [sliced_elements] + + # Build the slice path representation + slice_str = self._format_slice(item) + new_path = self._slice_path + slice_str + + return IDSSlice( + self._parent, + self.metadata, + sliced_elements, + new_path, + ) + else: + # Return a single element by index + return self._matched_elements[int(item)] + + def __getattr__(self, name: str) -> "IDSSlice": + """Access a child attribute on all matched elements. + + This returns a new IDSSlice containing the child attribute from + each matched element. + + Args: + name: Name of the attribute to access + + Returns: + A new IDSSlice containing the child attribute from each matched element + + Raises: + AttributeError: If the attribute doesn't exist + """ + # Avoid issues with special attributes + if name.startswith("_"): + raise AttributeError(f"IDSSlice has no attribute '{name}'") + + # Access the attribute on each element + child_elements = [] + + for element in self._matched_elements: + child = getattr(element, name) + child_elements.append(child) + + # Build the new path including the attribute access + new_path = self._slice_path + "." + name + + return IDSSlice( + self._parent, + None, # metadata is not directly applicable to the child + child_elements, + new_path, + ) + + def __repr__(self) -> str: + """Build a string representation of this slice.""" + toplevel_name = self._toplevel.metadata.name + matches_count = len(self._matched_elements) + match_word = "match" if matches_count == 1 else "matches" + return ( + f"" + ) + + def _build_repr_start(self) -> str: + """Build the start of the string representation. + + This is used for consistency with other IDS node types. + """ + return ( + f"<{type(self).__name__} (IDS:{self._toplevel.metadata.name}, {self._path}" + ) + + def values(self) -> List[Any]: + """Extract raw values from elements in this slice. + + For IDSPrimitive elements, this extracts the wrapped value. + For other element types, returns them as-is. + + This is useful for getting the actual data without the IDS wrapper + when accessing scalar fields through a slice, without requiring + explicit looping through the original collection. + + Returns: + List of raw Python/numpy values or other unwrapped elements + + Examples: + >>> # Get names from identifiers without looping + >>> n = edge_profiles.grid_ggd[0].grid_subset[:].identifier.name.values() + >>> # Result: ["nodes", "edges", "cells"] + >>> + >>> # Works with any scalar or array type + >>> i = edge_profiles.grid_ggd[0].grid_subset[:].identifier.index.values() + >>> # Result: [1, 2, 5] + >>> + >>> # Still works with structures (returns unwrapped) + >>> ions = profiles[:].ion.values() + >>> # Result: [IDSStructure(...), IDSStructure(...), ...] + """ + from imas.ids_primitive import IDSPrimitive + + result = [] + for element in self._matched_elements: + if isinstance(element, IDSPrimitive): + # Extract the wrapped value from IDSPrimitive + result.append(element.value) + else: + # Return other types as-is (structures, arrays, etc.) + result.append(element) + return result + + def flatten(self, recursive: bool = False) -> "IDSSlice": + """Flatten nested arrays into a single IDSSlice. + + This method is useful for MATLAB-style matrix-like access. + It flattens matched elements that are themselves iterable + (such as IDSStructArray) into a single flat IDSSlice. + + Args: + recursive: If True, recursively flatten nested IDSSlices. + If False (default), only flatten one level. + + Returns: + New IDSSlice with flattened elements + + Examples: + >>> # Get all ions from 2 profiles as a flat list + >>> all_ions = cp.profiles_1d[:2].ion.flatten() + >>> len(all_ions) # Number of total ions + 10 + >>> # Iterate over all ions + >>> for ion in all_ions: + ... print(ion.label) + + >>> # Flatten recursively for deeply nested structures + >>> deeply_nested = obj.level1[:].level2[:].flatten(recursive=True) + """ + from imas.ids_struct_array import IDSStructArray + + flattened = [] + + for element in self._matched_elements: + if isinstance(element, IDSStructArray): + # Flatten IDSStructArray elements + flattened.extend(list(element)) + elif recursive and isinstance(element, IDSSlice): + # Recursively flatten nested IDSSlices + flattened.extend(list(element.flatten(recursive=True))) + else: + # Keep non-array elements as-is + flattened.append(element) + + new_path = self._slice_path + ".flatten()" + return IDSSlice( + self._parent, + None, + flattened, + new_path, + ) + + @staticmethod + def _format_slice(slice_obj: slice) -> str: + """Format a slice object as a string. + + Args: + slice_obj: The slice object to format + + Returns: + String representation like "[1:5]", "[::2]", etc. + """ + start = slice_obj.start if slice_obj.start is not None else "" + stop = slice_obj.stop if slice_obj.stop is not None else "" + step = slice_obj.step if slice_obj.step is not None else "" + + if step: + return f"[{start}:{stop}:{step}]" + else: + return f"[{start}:{stop}]" + + def _validate(self) -> None: + """Validate all matched elements.""" + for element in self._matched_elements: + element._validate() + + def _xxhash(self) -> bytes: + """Compute hash of all matched elements.""" + from xxhash import xxh3_64 + + hsh = xxh3_64(len(self._matched_elements).to_bytes(8, "little")) + for element in self._matched_elements: + hsh.update(element._xxhash()) + return hsh.digest() diff --git a/imas/ids_struct_array.py b/imas/ids_struct_array.py index 38e8165..8495bb3 100644 --- a/imas/ids_struct_array.py +++ b/imas/ids_struct_array.py @@ -121,22 +121,54 @@ def _element_structure(self): struct = IDSStructure(self, self.metadata) return struct + @staticmethod + def _format_slice(slice_obj: slice) -> str: + """Format a slice object as a string. + + Args: + slice_obj: The slice object to format + + Returns: + String representation like "[1:5]", "[::2]", etc. + """ + start = slice_obj.start if slice_obj.start is not None else "" + stop = slice_obj.stop if slice_obj.stop is not None else "" + step = slice_obj.step if slice_obj.step is not None else "" + + if step: + return f"[{start}:{stop}:{step}]" + else: + return f"[{start}:{stop}]" + def __getitem__(self, item): - # allow slices + """Get element(s) from the struct array. + + Args: + item: Integer index or slice object + + Returns: + A single IDSStructure if item is an int, or an IDSSlice if item is a slice + """ if isinstance(item, slice): - if self._lazy: - start, stop, step = item.start, item.stop, item.step - if stop is None: - stop = sys.maxsize - - for i in range(start or 0, stop, step or 1): - try: - self._load(i) - except IndexError: - break - return self.value[item] + # Handle slice by returning an IDSSlice + from imas.ids_slice import IDSSlice + + # Get the matched elements + matched_elements = self.value[item] + if not isinstance(matched_elements, list): + matched_elements = [matched_elements] + + # Build the slice path representation + slice_str = self._format_slice(item) + + return IDSSlice( + self, + self.metadata, + matched_elements, + slice_str, + ) else: - # value is a list, so the given item should be convertable to integer + # Handle integer index list_idx = int(item) if self._lazy: self._load(item) diff --git a/imas/test/test_ids_slice.py b/imas/test/test_ids_slice.py new file mode 100644 index 0000000..fb268c4 --- /dev/null +++ b/imas/test/test_ids_slice.py @@ -0,0 +1,477 @@ +# This file is part of IMAS-Python. +# You should have received the IMAS-Python LICENSE file with this project. + +import numpy as np +import pytest + +from imas.ids_factory import IDSFactory +from imas.ids_slice import IDSSlice + + +@pytest.fixture +def wall_with_units(): + return create_wall_with_units() + + +@pytest.fixture +def wall_varying_sizes(): + return create_wall_with_units(total_units=2, element_counts=[4, 2]) + + +def create_wall_with_units( + total_units: int = 12, + element_counts=None, + *, + dd_version: str = "3.39.0", +): + + if total_units < 2: + raise ValueError("Need at least two units to exercise slice edge cases.") + + wall = IDSFactory(dd_version).wall() + wall.description_2d.resize(1) + + units = wall.description_2d[0].vessel.unit + units.resize(total_units) + + if element_counts is None: + # Ensure unit index 1 has fewer elements than unit 0 to trigger the corner case. + element_counts = [4, 2] + [3] * (total_units - 2) + + element_counts = list(element_counts) + if len(element_counts) != total_units: + raise ValueError("element_counts length must match total_units.") + + for unit_idx, unit in enumerate(units): + unit.name = f"unit-{unit_idx}" + unit.element.resize(element_counts[unit_idx]) + for elem_idx, element in enumerate(unit.element): + element.name = f"element-{unit_idx}-{elem_idx}" + + return wall + + +def safe_element_lookup(units_slice, element_index: int): + collected = [] + skipped_units = [] + for idx, unit in enumerate(units_slice): + elements = unit.element + if element_index >= len(elements): + skipped_units.append(idx) + continue + collected.append(elements[element_index].name.value) + return {"collected": collected, "skipped_units": skipped_units} + + +class TestBasicSlicing: + + def test_slice_with_start_and_stop(self): + cp = IDSFactory("3.39.0").core_profiles() + cp.profiles_1d.resize(10) + + result = cp.profiles_1d[3:7] + assert isinstance(result, IDSSlice) + assert len(result) == 4 + + result = cp.profiles_1d[::2] + assert isinstance(result, IDSSlice) + assert len(result) == 5 + + result = cp.profiles_1d[-5:] + assert isinstance(result, IDSSlice) + assert len(result) == 5 + + def test_slice_corner_cases(self): + cp = IDSFactory("3.39.0").core_profiles() + cp.profiles_1d.resize(10) + + result = cp.profiles_1d[0:100] + assert len(result) == 10 + + result = cp.profiles_1d[10:20] + assert len(result) == 0 + + result = cp.profiles_1d[::-1] + assert len(result) == 10 + + def test_integer_index_still_works(self): + cp = IDSFactory("3.39.0").core_profiles() + cp.profiles_1d.resize(10) + + result = cp.profiles_1d[5] + assert not isinstance(result, IDSSlice) + assert hasattr(result, "_path") + + +class TestIDSSlicePath: + + def test_slice_path_representation(self): + cp = IDSFactory("3.39.0").core_profiles() + cp.profiles_1d.resize(10) + + result = cp.profiles_1d[5:8] + expected_path = "profiles_1d[5:8]" + assert expected_path in result._path + + result = cp.profiles_1d[5:8][1:3] + assert "[" in result._path + + def test_attribute_access_path(self, wall_with_units): + wall = wall_with_units + units = wall.description_2d[0].vessel.unit[8:] + + element_slice = units.element + assert "element" in element_slice._path + + +class TestIDSSliceIteration: + + def test_iteration_and_len(self): + cp = IDSFactory("3.39.0").core_profiles() + cp.profiles_1d.resize(5) + + slice_obj = cp.profiles_1d[1:4] + + items = list(slice_obj) + assert len(items) == 3 + + assert len(slice_obj) == 3 + + +class TestIDSSliceIndexing: + + def test_integer_indexing_slice(self): + cp = IDSFactory("3.39.0").core_profiles() + cp.profiles_1d.resize(10) + + slice_obj = cp.profiles_1d[3:7] + element = slice_obj[1] + assert not isinstance(element, IDSSlice) + + def test_slice_indexing_slice(self): + cp = IDSFactory("3.39.0").core_profiles() + cp.profiles_1d.resize(10) + + slice_obj = cp.profiles_1d[2:8] + nested_slice = slice_obj[1:4] + assert isinstance(nested_slice, IDSSlice) + assert len(nested_slice) == 3 + + +class TestIDSSliceAttributeAccess: + + def test_attribute_access_nested_attributes(self, wall_with_units): + wall = wall_with_units + units = wall.description_2d[0].vessel.unit[8:] + + names = units.name + assert isinstance(names, IDSSlice) + assert len(names) == 4 + + units_full = wall.description_2d[0].vessel.unit + elements = units_full[:].element + assert isinstance(elements, IDSSlice) + + +class TestIDSSliceRepr: + + def test_repr_count_display(self): + cp = IDSFactory("3.39.0").core_profiles() + cp.profiles_1d.resize(10) + + slice_obj = cp.profiles_1d[5:6] + repr_str = repr(slice_obj) + assert "IDSSlice" in repr_str + assert "1 match" in repr_str + + slice_obj = cp.profiles_1d[5:8] + repr_str = repr(slice_obj) + assert "IDSSlice" in repr_str + assert "3 matches" in repr_str + + +class TestIDSSliceValidation: + + def test_validate_slice(self): + cp = IDSFactory("3.39.0").core_profiles() + cp.profiles_1d.resize(2) + cp.ids_properties.homogeneous_time = 1 + + slice_obj = cp.profiles_1d[:] + assert isinstance(slice_obj, IDSSlice) + + +class TestIDSSliceHash: + + def test_xxhash(self): + cp = IDSFactory("3.39.0").core_profiles() + cp.profiles_1d.resize(5) + + slice_obj = cp.profiles_1d[:] + hash_bytes = slice_obj._xxhash() + assert isinstance(hash_bytes, bytes) + assert len(hash_bytes) > 0 + + +class TestWallExampleSlicing: + + def test_wall_units_nested_element_access(self, wall_with_units): + wall = wall_with_units + units = wall.description_2d[0].vessel.unit + + units_slice = units[8:] + assert isinstance(units_slice, IDSSlice) + assert len(units_slice) == 4 + + elements_slice = units_slice.element + assert isinstance(elements_slice, IDSSlice) + + +class TestEdgeCases: + + def test_slice_empty_array(self): + cp = IDSFactory("3.39.0").core_profiles() + + result = cp.profiles_1d[:] + assert isinstance(result, IDSSlice) + assert len(result) == 0 + + def test_slice_single_element(self): + cp = IDSFactory("3.39.0").core_profiles() + cp.profiles_1d.resize(1) + + result = cp.profiles_1d[:] + assert isinstance(result, IDSSlice) + assert len(result) == 1 + + def test_invalid_step_zero(self): + cp = IDSFactory("3.39.0").core_profiles() + cp.profiles_1d.resize(10) + + with pytest.raises(ValueError): + cp.profiles_1d[::0] + + +class TestFlatten: + + def test_flatten_basic_and_partial(self): + cp = IDSFactory("3.39.0").core_profiles() + cp.profiles_1d.resize(3) + + for profile in cp.profiles_1d: + profile.ion.resize(5) + + slice_obj = cp.profiles_1d[:].ion + flattened = slice_obj.flatten() + assert isinstance(flattened, IDSSlice) + assert len(flattened) == 15 + + cp2 = IDSFactory("3.39.0").core_profiles() + cp2.profiles_1d.resize(4) + for profile in cp2.profiles_1d: + profile.ion.resize(3) + flattened2 = cp2.profiles_1d[:2].ion.flatten() + assert len(flattened2) == 6 + + def test_flatten_empty_and_single(self): + cp = IDSFactory("3.39.0").core_profiles() + cp.profiles_1d.resize(2) + empty_flattened = cp.profiles_1d[:].ion.flatten() + assert len(empty_flattened) == 0 + + cp2 = IDSFactory("3.39.0").core_profiles() + cp2.profiles_1d.resize(1) + cp2.profiles_1d[0].ion.resize(4) + single_flattened = cp2.profiles_1d[:].ion.flatten() + assert len(single_flattened) == 4 + + def test_flatten_indexing_and_slicing(self): + cp = IDSFactory("3.39.0").core_profiles() + cp.profiles_1d.resize(2) + + for i, profile in enumerate(cp.profiles_1d): + profile.ion.resize(3) + for j, ion in enumerate(profile.ion): + ion.label = f"ion_{i}_{j}" + + flattened = cp.profiles_1d[:].ion.flatten() + + assert flattened[0].label == "ion_0_0" + assert flattened[3].label == "ion_1_0" + + subset = flattened[1:4] + assert isinstance(subset, IDSSlice) + assert len(subset) == 3 + labels = [ion.label for ion in subset] + assert labels == ["ion_0_1", "ion_0_2", "ion_1_0"] + + def test_flatten_repr_and_path(self): + cp = IDSFactory("3.39.0").core_profiles() + cp.profiles_1d.resize(2) + for profile in cp.profiles_1d: + profile.ion.resize(2) + + flattened = cp.profiles_1d[:].ion.flatten() + repr_str = repr(flattened) + + assert "IDSSlice" in repr_str + assert "4 matches" in repr_str + assert ".flatten()" in flattened._path + + def test_flatten_complex_case(self, wall_with_units): + wall = wall_with_units + units = wall.description_2d[0].vessel.unit[:5] + + all_elements = units.element.flatten() + assert len(all_elements) == 4 + 2 + 3 + 3 + 3 + + +class TestVaryingArraySizeIndexing: + + def test_unit_slice_element_integer_indexing(self, wall_varying_sizes): + units = wall_varying_sizes.description_2d[0].vessel.unit + units_slice = units[:2] + element_slice = units_slice.element + + with pytest.raises(IndexError): + element_slice[2] + + def test_unit_slice_element_safe_indexing_scenarios(self, wall_varying_sizes): + units = wall_varying_sizes.description_2d[0].vessel.unit + units_slice = units[:2] + + result = safe_element_lookup(units_slice, 1) + assert len(result["collected"]) == 2 + assert result["collected"] == ["element-0-1", "element-1-1"] + + result = safe_element_lookup(units_slice, 2) + assert len(result["collected"]) == 1 + assert result["skipped_units"] == [1] + + result = safe_element_lookup(units_slice, 4) + assert len(result["collected"]) == 0 + assert result["skipped_units"] == [0, 1] + + def test_unit_slice_element_individual_access(self, wall_varying_sizes): + units = wall_varying_sizes.description_2d[0].vessel.unit + element_slice = units[:2].element + + array_0 = element_slice[0] + assert len(array_0) == 4 + assert array_0[2].name.value == "element-0-2" + + array_1 = element_slice[1] + assert len(array_1) == 2 + + with pytest.raises(IndexError): + array_1[2] + + def test_wall_with_diverse_element_counts(self): + wall = create_wall_with_units(total_units=5, element_counts=[3, 1, 4, 2, 5]) + + units = wall.description_2d[0].vessel.unit + units_slice = units[:3] + element_slice = units_slice.element + + assert len(element_slice[0]) == 3 + assert len(element_slice[1]) == 1 + assert len(element_slice[2]) == 4 + + result = safe_element_lookup(units_slice, 2) + assert len(result["collected"]) == 2 + assert result["skipped_units"] == [1] + + +class TestIDSSliceValues: + + def test_values_basic_extraction(self, wall_with_units): + wall = wall_with_units + units = wall.description_2d[0].vessel.unit + + names_slice = units[:].name + names = names_slice.values() + + assert isinstance(names, list) + assert len(names) == 12 + assert all(isinstance(name, str) and name.startswith("unit-") for name in names) + assert names == [f"unit-{i}" for i in range(12)] + + def test_values_integer_and_float_extraction(self): + cp = IDSFactory("3.39.0").core_profiles() + cp.profiles_1d.resize(3) + + for profile in cp.profiles_1d: + profile.ion.resize(2) + for i, ion in enumerate(profile.ion): + ion.neutral_index = i + ion.z_ion = float(i + 1) + + ions = cp.profiles_1d[:].ion.flatten() + indices = ions[:].neutral_index.values() + assert all(isinstance(idx, (int, np.integer)) for idx in indices) + + z_values = ions[:].z_ion.values() + assert all(isinstance(z, (float, np.floating)) for z in z_values) + + def test_values_partial_and_empty_slices(self, wall_with_units): + wall = wall_with_units + units = wall.description_2d[0].vessel.unit + + names = units[:5].name.values() + assert len(names) == 5 + assert names == [f"unit-{i}" for i in range(5)] + + cp = IDSFactory("3.39.0").core_profiles() + cp.profiles_1d.resize(5) + empty_values = cp.profiles_1d[5:10].label.values() + assert len(empty_values) == 0 + + def test_values_with_step_and_negative_indices(self, wall_with_units): + wall = wall_with_units + units = wall.description_2d[0].vessel.unit + + names_step = units[::2].name.values() + assert len(names_step) == 6 + assert names_step == [f"unit-{i}" for i in range(0, 12, 2)] + + names_neg = units[-3:].name.values() + assert len(names_neg) == 3 + assert names_neg == [f"unit-{i}" for i in range(9, 12)] + + def test_values_structure_preservation(self): + cp = IDSFactory("3.39.0").core_profiles() + cp.profiles_1d.resize(3) + + for profile in cp.profiles_1d: + profile.ion.resize(2) + + ions = cp.profiles_1d[:].ion.flatten().values() + + assert len(ions) == 6 + for ion in ions: + assert hasattr(ion, "_path") + from imas.ids_primitive import IDSPrimitive + + assert not isinstance(ion, IDSPrimitive) + + def test_values_array_primitives(self): + cp = IDSFactory("3.39.0").core_profiles() + cp.profiles_1d.resize(2) + + cp.profiles_1d[0].grid.psi = np.linspace(0, 1, 10) + cp.profiles_1d[1].grid.psi = np.linspace(1, 2, 10) + + psi_values = cp.profiles_1d[:].grid.psi.values() + + assert len(psi_values) == 2 + assert all(isinstance(psi, np.ndarray) for psi in psi_values) + + def test_values_consistency_with_iteration(self, wall_with_units): + wall = wall_with_units + units = wall.description_2d[0].vessel.unit + + names_via_values = units[:5].name.values() + + names_via_iteration = [unit.name.value for unit in units[:5]] + + assert names_via_values == names_via_iteration From 67f24c66e5f8d4922cdf5fe4a604b28b24322b69 Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Thu, 6 Nov 2025 16:34:42 +0100 Subject: [PATCH 05/44] removed flake8 issue --- imas/ids_struct_array.py | 1 - 1 file changed, 1 deletion(-) diff --git a/imas/ids_struct_array.py b/imas/ids_struct_array.py index 8495bb3..56e0706 100644 --- a/imas/ids_struct_array.py +++ b/imas/ids_struct_array.py @@ -4,7 +4,6 @@ """ import logging -import sys from copy import deepcopy from typing import Optional, Tuple From c1a5fba4c4c531cbe17845c7a7ca718db40a614e Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Fri, 7 Nov 2025 10:05:10 +0100 Subject: [PATCH 06/44] updated documentation and examples of using IDSSlice --- docs/source/api.rst | 1 + docs/source/array_slicing.rst | 81 ++++++++++++++++++++++++ docs/source/courses/advanced/explore.rst | 26 ++++++++ docs/source/imas_architecture.rst | 6 ++ imas/ids_struct_array.py | 4 ++ 5 files changed, 118 insertions(+) create mode 100644 docs/source/array_slicing.rst diff --git a/docs/source/api.rst b/docs/source/api.rst index 5df6e57..0eaa3ed 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -19,4 +19,5 @@ IMAS-Python IDS manipulation ids_toplevel.IDSToplevel ids_primitive.IDSPrimitive ids_structure.IDSStructure + ids_slice.IDSSlice ids_struct_array.IDSStructArray diff --git a/docs/source/array_slicing.rst b/docs/source/array_slicing.rst new file mode 100644 index 0000000..122b566 --- /dev/null +++ b/docs/source/array_slicing.rst @@ -0,0 +1,81 @@ +Array Slicing +============== + +The ``IDSStructArray`` class supports Python's standard slicing syntax. + +Key Difference +--------------- + +- ``array[0]`` returns ``IDSStructure`` (single element) +- ``array[:]`` or ``array[1:5]`` returns ``IDSSlice`` (collection with ``flatten()`` and ``values()``) + +Basic Usage +----------- + +.. code-block:: python + + import imas + + entry = imas.DBEntry("imas:hdf5?path=my-testdb") + cp = entry.get("core_profiles") + + # Integer indexing + first = cp.profiles_1d[0] # IDSStructure + last = cp.profiles_1d[-1] # IDSStructure + + # Slice operations + subset = cp.profiles_1d[1:5] # IDSSlice + every_other = cp.profiles_1d[::2] # IDSSlice + + # Flatten nested arrays + all_ions = cp.profiles_1d[:].ion[:].flatten() # IDSSlice of individual ions + + # Extract values + labels = all_ions.label.values() + +Common Patterns +--------------- + +**Process a range:** + +.. code-block:: python + + for element in cp.profiles_1d[5:10]: + print(element.time) + +**Flatten and iterate:** + +.. code-block:: python + + for ion in cp.profiles_1d[:].ion[:].flatten(): + print(ion.label.value) + +**Get all values:** + +.. code-block:: python + + times = cp.profiles_1d[:].time.values() + +Important Constraint +-------------------- + +When accessing attributes through a slice, all elements must have that attribute. +If elements are ``IDSStructArray`` objects, flatten first: + +.. code-block:: python + + # Fails - IDSStructArray has no 'label' attribute + # cp.profiles_1d[:].ion[:].label + + # Correct - flatten first + labels = cp.profiles_1d[:].ion[:].flatten().label.values() + +Lazy-Loaded Arrays +------------------- + +Individual indexing works with lazy loading, but slicing doesn't: + +.. code-block:: python + + element = lazy_array[0] # OK - loads on demand + subset = lazy_array[1:5] # ValueError diff --git a/docs/source/courses/advanced/explore.rst b/docs/source/courses/advanced/explore.rst index 7b383bc..f16d254 100644 --- a/docs/source/courses/advanced/explore.rst +++ b/docs/source/courses/advanced/explore.rst @@ -72,6 +72,32 @@ structures (modeled by :py:class:`~imas.ids_struct_array.IDSStructArray`) are (a name applies) arrays containing :py:class:`~imas.ids_structure.IDSStructure`\ s. Data nodes can contain scalar or array data of various types. +**Slicing Arrays of Structures** + +Arrays of structures support Python slice notation, which returns an +:py:class:`~imas.ids_slice.IDSSlice` object containing matched elements: + +.. code-block:: python + + import imas + + core_profiles = imas.IDSFactory().core_profiles() + core_profiles.profiles_1d.resize(10) # Create 10 profiles + + # Integer indexing returns a single structure + first = core_profiles.profiles_1d[0] + + # Slice notation returns an IDSSlice + subset = core_profiles.profiles_1d[2:5] # Elements 2, 3, 4 + every_other = core_profiles.profiles_1d[::2] # Every second element + + # IDSSlice supports flatten() and values() for convenient data access + all_ions = core_profiles.profiles_1d[:].ion[:].flatten() + for ion in all_ions: + print(ion.label.value) + +For detailed information on slicing operations, see :doc:`../../array_slicing`. + Some methods and properties are defined for all data nodes and arrays of structures: ``len()`` diff --git a/docs/source/imas_architecture.rst b/docs/source/imas_architecture.rst index 182d2a0..6c61321 100644 --- a/docs/source/imas_architecture.rst +++ b/docs/source/imas_architecture.rst @@ -168,6 +168,12 @@ The following submodules and classes represent IDS nodes. :py:class:`~imas.ids_struct_array.IDSStructArray` class, which models Arrays of Structures. It also contains some :ref:`dev lazy loading` logic. +- :py:mod:`imas.ids_slice` contains the + :py:class:`~imas.ids_slice.IDSSlice` class, which represents a collection of IDS + nodes matching a slice expression. It provides slicing operations on + :py:class:`~imas.ids_struct_array.IDSStructArray` elements and supports operations + like ``flatten()`` and ``values()`` for accessing matched elements. + - :py:mod:`imas.ids_structure` contains the :py:class:`~imas.ids_structure.IDSStructure` class, which models Structures. It contains the :ref:`lazy instantiation` logic and some of the :ref:`dev lazy loading` diff --git a/imas/ids_struct_array.py b/imas/ids_struct_array.py index 56e0706..766dbc1 100644 --- a/imas/ids_struct_array.py +++ b/imas/ids_struct_array.py @@ -149,6 +149,10 @@ def __getitem__(self, item): A single IDSStructure if item is an int, or an IDSSlice if item is a slice """ if isinstance(item, slice): + # Slicing is not supported on lazy-loaded arrays + if self._lazy: + raise ValueError("Lazy-loaded IDSs do not support slicing. Use integer indexing instead.") + # Handle slice by returning an IDSSlice from imas.ids_slice import IDSSlice From a1091ed597d337dece4e61fd86b500744cc8be8c Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Wed, 12 Nov 2025 09:23:23 +0100 Subject: [PATCH 07/44] fixed black formatting --- imas/ids_struct_array.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/imas/ids_struct_array.py b/imas/ids_struct_array.py index 766dbc1..347ab0b 100644 --- a/imas/ids_struct_array.py +++ b/imas/ids_struct_array.py @@ -151,8 +151,10 @@ def __getitem__(self, item): if isinstance(item, slice): # Slicing is not supported on lazy-loaded arrays if self._lazy: - raise ValueError("Lazy-loaded IDSs do not support slicing. Use integer indexing instead.") - + raise ValueError( + "Lazy-loaded IDSs do not support slicing. Use integer indexing instead." + ) + # Handle slice by returning an IDSSlice from imas.ids_slice import IDSSlice From f30d305e6c98864aeb375d399fdc0acded2765c7 Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Wed, 12 Nov 2025 09:26:09 +0100 Subject: [PATCH 08/44] fix flake8 issue --- imas/ids_struct_array.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/imas/ids_struct_array.py b/imas/ids_struct_array.py index 347ab0b..fcf9256 100644 --- a/imas/ids_struct_array.py +++ b/imas/ids_struct_array.py @@ -152,7 +152,8 @@ def __getitem__(self, item): # Slicing is not supported on lazy-loaded arrays if self._lazy: raise ValueError( - "Lazy-loaded IDSs do not support slicing. Use integer indexing instead." + "Lazy-loaded IDSs do not support slicing. " + "Use integer indexing instead." ) # Handle slice by returning an IDSSlice From 674c25f2aabf0d682dd8bb269aee6947f8c66c11 Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Wed, 12 Nov 2025 09:29:33 +0100 Subject: [PATCH 09/44] add array_slicing in the documentation --- docs/source/index.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/source/index.rst b/docs/source/index.rst index 8388f5b..7b8f98f 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -50,6 +50,7 @@ Manual configuring cli netcdf + array_slicing changelog examples From 4cf59379f75f66dfe57e10a531fb67532d807dc2 Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Fri, 21 Nov 2025 11:27:30 +0100 Subject: [PATCH 10/44] Fix per review comment https://github.com/iterorganization/IMAS-Python/pull/20#discussion_r2522794887 --- imas/ids_slice.py | 62 +++++-------------------------------- imas/ids_struct_array.py | 1 - imas/test/test_ids_slice.py | 12 +++---- 3 files changed, 12 insertions(+), 63 deletions(-) diff --git a/imas/ids_slice.py b/imas/ids_slice.py index b561c23..5887d2a 100644 --- a/imas/ids_slice.py +++ b/imas/ids_slice.py @@ -10,13 +10,10 @@ import logging from typing import Any, Iterator, List, Union - -from imas.ids_base import IDSBase - logger = logging.getLogger(__name__) -class IDSSlice(IDSBase): +class IDSSlice: """Represents a slice of IDS struct array elements. When slicing an IDSStructArray, instead of returning a regular Python list, @@ -27,51 +24,39 @@ class IDSSlice(IDSBase): - Iteration over matched elements """ - __slots__ = ["_parent", "metadata", "_matched_elements", "_slice_path", "_lazy"] + __slots__ = ["metadata", "_matched_elements", "_slice_path"] def __init__( self, - parent: IDSBase, metadata: Any, - matched_elements: List[IDSBase], + matched_elements: List[Any], slice_path: str, ): """Initialize IDSSlice. Args: - parent: The parent IDSStructArray that was sliced metadata: Metadata from the parent array matched_elements: List of elements that matched the slice slice_path: String representation of the slice operation (e.g., "[8:]") """ - self._parent = parent self.metadata = metadata self._matched_elements = matched_elements self._slice_path = slice_path - self._lazy = parent._lazy - - @property - def _toplevel(self): - """Return the toplevel instance this node belongs to""" - return self._parent._toplevel @property def _path(self) -> str: - """Build the path to this slice. - - The path includes the parent's path plus the slice operation. - """ - return self._parent._path + self._slice_path + """Return the path representation of this slice.""" + return self._slice_path def __len__(self) -> int: """Return the number of elements matched by this slice.""" return len(self._matched_elements) - def __iter__(self) -> Iterator[IDSBase]: + def __iter__(self) -> Iterator[Any]: """Iterate over all matched elements.""" return iter(self._matched_elements) - def __getitem__(self, item: Union[int, slice]) -> Union[IDSBase, "IDSSlice"]: + def __getitem__(self, item: Union[int, slice]) -> Union[Any, "IDSSlice"]: """Get element(s) from the slice. Args: @@ -95,7 +80,6 @@ def __getitem__(self, item: Union[int, slice]) -> Union[IDSBase, "IDSSlice"]: new_path = self._slice_path + slice_str return IDSSlice( - self._parent, self.metadata, sliced_elements, new_path, @@ -134,7 +118,6 @@ def __getattr__(self, name: str) -> "IDSSlice": new_path = self._slice_path + "." + name return IDSSlice( - self._parent, None, # metadata is not directly applicable to the child child_elements, new_path, @@ -142,23 +125,9 @@ def __getattr__(self, name: str) -> "IDSSlice": def __repr__(self) -> str: """Build a string representation of this slice.""" - toplevel_name = self._toplevel.metadata.name matches_count = len(self._matched_elements) match_word = "match" if matches_count == 1 else "matches" - return ( - f"" - ) - - def _build_repr_start(self) -> str: - """Build the start of the string representation. - - This is used for consistency with other IDS node types. - """ - return ( - f"<{type(self).__name__} (IDS:{self._toplevel.metadata.name}, {self._path}" - ) + return f"" def values(self) -> List[Any]: """Extract raw values from elements in this slice. @@ -241,7 +210,6 @@ def flatten(self, recursive: bool = False) -> "IDSSlice": new_path = self._slice_path + ".flatten()" return IDSSlice( - self._parent, None, flattened, new_path, @@ -265,17 +233,3 @@ def _format_slice(slice_obj: slice) -> str: return f"[{start}:{stop}:{step}]" else: return f"[{start}:{stop}]" - - def _validate(self) -> None: - """Validate all matched elements.""" - for element in self._matched_elements: - element._validate() - - def _xxhash(self) -> bytes: - """Compute hash of all matched elements.""" - from xxhash import xxh3_64 - - hsh = xxh3_64(len(self._matched_elements).to_bytes(8, "little")) - for element in self._matched_elements: - hsh.update(element._xxhash()) - return hsh.digest() diff --git a/imas/ids_struct_array.py b/imas/ids_struct_array.py index fcf9256..be3f2b7 100644 --- a/imas/ids_struct_array.py +++ b/imas/ids_struct_array.py @@ -168,7 +168,6 @@ def __getitem__(self, item): slice_str = self._format_slice(item) return IDSSlice( - self, self.metadata, matched_elements, slice_str, diff --git a/imas/test/test_ids_slice.py b/imas/test/test_ids_slice.py index fb268c4..6bea7b5 100644 --- a/imas/test/test_ids_slice.py +++ b/imas/test/test_ids_slice.py @@ -110,7 +110,7 @@ def test_slice_path_representation(self): cp.profiles_1d.resize(10) result = cp.profiles_1d[5:8] - expected_path = "profiles_1d[5:8]" + expected_path = "[5:8]" assert expected_path in result._path result = cp.profiles_1d[5:8][1:3] @@ -204,13 +204,9 @@ def test_validate_slice(self): class TestIDSSliceHash: def test_xxhash(self): - cp = IDSFactory("3.39.0").core_profiles() - cp.profiles_1d.resize(5) - - slice_obj = cp.profiles_1d[:] - hash_bytes = slice_obj._xxhash() - assert isinstance(hash_bytes, bytes) - assert len(hash_bytes) > 0 + # _xxhash method removed from IDSSlice as it's not needed + # (IDSSlice is not an IDSBase subclass) + pass class TestWallExampleSlicing: From c57e64e9a9086a45db8b4276ddf30f2d6a1133e0 Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Fri, 21 Nov 2025 11:29:19 +0100 Subject: [PATCH 11/44] Fix as per review comment https://github.com/iterorganization/IMAS-Python/pull/20#discussion_r2522808013 --- imas/ids_slice.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/imas/ids_slice.py b/imas/ids_slice.py index 5887d2a..92c9004 100644 --- a/imas/ids_slice.py +++ b/imas/ids_slice.py @@ -64,10 +64,6 @@ def __getitem__(self, item: Union[int, slice]) -> Union[Any, "IDSSlice"]: Returns: A single element if item is an int, or an IDSSlice if item is a slice - - Raises: - IndexError: If the index is out of range - AttributeError: If trying to index into elements that aren't indexable """ if isinstance(item, slice): # Further slice the matched elements @@ -99,9 +95,6 @@ def __getattr__(self, name: str) -> "IDSSlice": Returns: A new IDSSlice containing the child attribute from each matched element - - Raises: - AttributeError: If the attribute doesn't exist """ # Avoid issues with special attributes if name.startswith("_"): From c470bec28c21e9284aacb4111d9711ef8e645167 Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Fri, 21 Nov 2025 11:30:45 +0100 Subject: [PATCH 12/44] Fix as per review comment https://github.com/iterorganization/IMAS-Python/pull/20#discussion_r2522812979 --- imas/ids_slice.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/imas/ids_slice.py b/imas/ids_slice.py index 92c9004..4900174 100644 --- a/imas/ids_slice.py +++ b/imas/ids_slice.py @@ -68,8 +68,6 @@ def __getitem__(self, item: Union[int, slice]) -> Union[Any, "IDSSlice"]: if isinstance(item, slice): # Further slice the matched elements sliced_elements = self._matched_elements[item] - if not isinstance(sliced_elements, list): - sliced_elements = [sliced_elements] # Build the slice path representation slice_str = self._format_slice(item) From 156adedcb0f79d1ef9d746aad2f4d7f18bafc8bf Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Fri, 21 Nov 2025 11:32:47 +0100 Subject: [PATCH 13/44] Fix as per review comment https://github.com/iterorganization/IMAS-Python/pull/20#discussion_r2522819968 --- imas/ids_slice.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/imas/ids_slice.py b/imas/ids_slice.py index 4900174..5782c35 100644 --- a/imas/ids_slice.py +++ b/imas/ids_slice.py @@ -99,11 +99,7 @@ def __getattr__(self, name: str) -> "IDSSlice": raise AttributeError(f"IDSSlice has no attribute '{name}'") # Access the attribute on each element - child_elements = [] - - for element in self._matched_elements: - child = getattr(element, name) - child_elements.append(child) + child_elements = [getattr(element, name) for element in self] # Build the new path including the attribute access new_path = self._slice_path + "." + name From 87158d23766cb1cf50011f7419506aa1618c7d16 Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Fri, 21 Nov 2025 11:36:53 +0100 Subject: [PATCH 14/44] Fix as per review comment https://github.com/iterorganization/IMAS-Python/pull/20#discussion_r2522827336 --- imas/ids_slice.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/imas/ids_slice.py b/imas/ids_slice.py index 5782c35..c8104a5 100644 --- a/imas/ids_slice.py +++ b/imas/ids_slice.py @@ -105,7 +105,7 @@ def __getattr__(self, name: str) -> "IDSSlice": new_path = self._slice_path + "." + name return IDSSlice( - None, # metadata is not directly applicable to the child + self.metadata, child_elements, new_path, ) @@ -197,7 +197,7 @@ def flatten(self, recursive: bool = False) -> "IDSSlice": new_path = self._slice_path + ".flatten()" return IDSSlice( - None, + self.metadata, flattened, new_path, ) From 94e20f2a4babaa7bb64715263a2cb0c15158675c Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Fri, 21 Nov 2025 11:40:40 +0100 Subject: [PATCH 15/44] Fix as per review comment https://github.com/iterorganization/IMAS-Python/pull/20#discussion_r2522854256 --- imas/ids_slice.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/imas/ids_slice.py b/imas/ids_slice.py index c8104a5..f49492b 100644 --- a/imas/ids_slice.py +++ b/imas/ids_slice.py @@ -94,9 +94,13 @@ def __getattr__(self, name: str) -> "IDSSlice": Returns: A new IDSSlice containing the child attribute from each matched element """ - # Avoid issues with special attributes - if name.startswith("_"): - raise AttributeError(f"IDSSlice has no attribute '{name}'") + # Try to get child metadata if available + child_metadata = None + if self.metadata is not None: + try: + child_metadata = self.metadata[name] + except (KeyError, TypeError): + raise AttributeError(f"IDSSlice has no attribute '{name}'") from None # Access the attribute on each element child_elements = [getattr(element, name) for element in self] @@ -105,7 +109,7 @@ def __getattr__(self, name: str) -> "IDSSlice": new_path = self._slice_path + "." + name return IDSSlice( - self.metadata, + child_metadata, child_elements, new_path, ) From 83985376349d4080ce42b03edfce146c87a95be7 Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Fri, 21 Nov 2025 11:42:38 +0100 Subject: [PATCH 16/44] Fix as per review comment https://github.com/iterorganization/IMAS-Python/pull/20#discussion_r2522857826 --- imas/ids_slice.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/imas/ids_slice.py b/imas/ids_slice.py index f49492b..dc02f5e 100644 --- a/imas/ids_slice.py +++ b/imas/ids_slice.py @@ -10,6 +10,8 @@ import logging from typing import Any, Iterator, List, Union +from imas.ids_metadata import IDSMetadata + logger = logging.getLogger(__name__) @@ -28,7 +30,7 @@ class IDSSlice: def __init__( self, - metadata: Any, + metadata: IDSMetadata, matched_elements: List[Any], slice_path: str, ): @@ -100,7 +102,7 @@ def __getattr__(self, name: str) -> "IDSSlice": try: child_metadata = self.metadata[name] except (KeyError, TypeError): - raise AttributeError(f"IDSSlice has no attribute '{name}'") from None + pass # Access the attribute on each element child_elements = [getattr(element, name) for element in self] From 18d7fd0173787d8c629661dc4abf70912ff3126c Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Fri, 21 Nov 2025 18:01:53 +0100 Subject: [PATCH 17/44] Fix as per review comment https://github.com/iterorganization/IMAS-Python/pull/20#discussion_r2522885923 --- imas/ids_slice.py | 69 ++++++++++++++++++++++++++++--------- imas/test/test_ids_slice.py | 29 +++++++++++----- 2 files changed, 72 insertions(+), 26 deletions(-) diff --git a/imas/ids_slice.py b/imas/ids_slice.py index dc02f5e..946bba1 100644 --- a/imas/ids_slice.py +++ b/imas/ids_slice.py @@ -8,7 +8,7 @@ """ import logging -from typing import Any, Iterator, List, Union +from typing import Any, Iterator, List, Optional, Union from imas.ids_metadata import IDSMetadata @@ -30,7 +30,7 @@ class IDSSlice: def __init__( self, - metadata: IDSMetadata, + metadata: Optional[IDSMetadata], matched_elements: List[Any], slice_path: str, ): @@ -67,22 +67,57 @@ def __getitem__(self, item: Union[int, slice]) -> Union[Any, "IDSSlice"]: Returns: A single element if item is an int, or an IDSSlice if item is a slice """ - if isinstance(item, slice): - # Further slice the matched elements - sliced_elements = self._matched_elements[item] - - # Build the slice path representation - slice_str = self._format_slice(item) - new_path = self._slice_path + slice_str - - return IDSSlice( - self.metadata, - sliced_elements, - new_path, - ) + from imas.ids_struct_array import IDSStructArray + + # Check if matched elements are IDSStructArray - if so, apply indexing to each array + if self._matched_elements and isinstance(self._matched_elements[0], IDSStructArray): + if isinstance(item, slice): + # Apply the slice to each array and collect all results + sliced_elements = [] + for array in self._matched_elements: + sliced_elements.extend(list(array[item])) + + # Build the slice path representation + slice_str = self._format_slice(item) + new_path = self._slice_path + slice_str + + return IDSSlice( + self.metadata, + sliced_elements, + new_path, + ) + else: + # Apply integer index to each array + indexed_elements = [] + for array in self._matched_elements: + indexed_elements.append(array[item]) + + # Build the index path representation + new_path = self._slice_path + f"[{item}]" + + return IDSSlice( + self.metadata, + indexed_elements, + new_path, + ) else: - # Return a single element by index - return self._matched_elements[int(item)] + # Normal slice behavior for non-array elements + if isinstance(item, slice): + # Further slice the matched elements themselves + sliced_elements = self._matched_elements[item] + + # Build the slice path representation + slice_str = self._format_slice(item) + new_path = self._slice_path + slice_str + + return IDSSlice( + self.metadata, + sliced_elements, + new_path, + ) + else: + # Return a single element by index + return self._matched_elements[int(item)] def __getattr__(self, name: str) -> "IDSSlice": """Access a child attribute on all matched elements. diff --git a/imas/test/test_ids_slice.py b/imas/test/test_ids_slice.py index 6bea7b5..6612fe3 100644 --- a/imas/test/test_ids_slice.py +++ b/imas/test/test_ids_slice.py @@ -352,15 +352,20 @@ def test_unit_slice_element_individual_access(self, wall_varying_sizes): units = wall_varying_sizes.description_2d[0].vessel.unit element_slice = units[:2].element - array_0 = element_slice[0] - assert len(array_0) == 4 - assert array_0[2].name.value == "element-0-2" + # With new behavior, element_slice[0] gets first element from each array + first_from_each = element_slice[0] + assert isinstance(first_from_each, IDSSlice) + assert len(first_from_each) == 2 # 2 units, so 2 first elements - array_1 = element_slice[1] - assert len(array_1) == 2 + # To access individual arrays, use iteration + arrays = list(element_slice) + assert len(arrays[0]) == 4 + assert arrays[0][2].name.value == "element-0-2" + + assert len(arrays[1]) == 2 with pytest.raises(IndexError): - array_1[2] + arrays[1][2] def test_wall_with_diverse_element_counts(self): wall = create_wall_with_units(total_units=5, element_counts=[3, 1, 4, 2, 5]) @@ -369,9 +374,15 @@ def test_wall_with_diverse_element_counts(self): units_slice = units[:3] element_slice = units_slice.element - assert len(element_slice[0]) == 3 - assert len(element_slice[1]) == 1 - assert len(element_slice[2]) == 4 + # With new behavior, element_slice[0] gets first element from each array + first_from_each = element_slice[0] + assert isinstance(first_from_each, IDSSlice) + assert len(first_from_each) == 3 # 3 units + + # To access individual arrays, use iteration + arrays = list(element_slice) + assert len(arrays[0]) == 3 + assert len(arrays[2]) == 4 result = safe_element_lookup(units_slice, 2) assert len(result["collected"]) == 2 From 989513dc72006a1b83637e52b4820440c8fdcbaf Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Fri, 21 Nov 2025 18:12:59 +0100 Subject: [PATCH 18/44] Fix as per review comment https://github.com/iterorganization/IMAS-Python/pull/20#discussion_r2522889488 --- imas/ids_slice.py | 48 ------------------------------------- imas/test/test_ids_slice.py | 20 ++++++++-------- 2 files changed, 10 insertions(+), 58 deletions(-) diff --git a/imas/ids_slice.py b/imas/ids_slice.py index 946bba1..917ffb2 100644 --- a/imas/ids_slice.py +++ b/imas/ids_slice.py @@ -195,54 +195,6 @@ def values(self) -> List[Any]: result.append(element) return result - def flatten(self, recursive: bool = False) -> "IDSSlice": - """Flatten nested arrays into a single IDSSlice. - - This method is useful for MATLAB-style matrix-like access. - It flattens matched elements that are themselves iterable - (such as IDSStructArray) into a single flat IDSSlice. - - Args: - recursive: If True, recursively flatten nested IDSSlices. - If False (default), only flatten one level. - - Returns: - New IDSSlice with flattened elements - - Examples: - >>> # Get all ions from 2 profiles as a flat list - >>> all_ions = cp.profiles_1d[:2].ion.flatten() - >>> len(all_ions) # Number of total ions - 10 - >>> # Iterate over all ions - >>> for ion in all_ions: - ... print(ion.label) - - >>> # Flatten recursively for deeply nested structures - >>> deeply_nested = obj.level1[:].level2[:].flatten(recursive=True) - """ - from imas.ids_struct_array import IDSStructArray - - flattened = [] - - for element in self._matched_elements: - if isinstance(element, IDSStructArray): - # Flatten IDSStructArray elements - flattened.extend(list(element)) - elif recursive and isinstance(element, IDSSlice): - # Recursively flatten nested IDSSlices - flattened.extend(list(element.flatten(recursive=True))) - else: - # Keep non-array elements as-is - flattened.append(element) - - new_path = self._slice_path + ".flatten()" - return IDSSlice( - self.metadata, - flattened, - new_path, - ) - @staticmethod def _format_slice(slice_obj: slice) -> str: """Format a slice object as a string. diff --git a/imas/test/test_ids_slice.py b/imas/test/test_ids_slice.py index 6612fe3..318e1cc 100644 --- a/imas/test/test_ids_slice.py +++ b/imas/test/test_ids_slice.py @@ -258,7 +258,7 @@ def test_flatten_basic_and_partial(self): profile.ion.resize(5) slice_obj = cp.profiles_1d[:].ion - flattened = slice_obj.flatten() + flattened = slice_obj[:] assert isinstance(flattened, IDSSlice) assert len(flattened) == 15 @@ -266,19 +266,19 @@ def test_flatten_basic_and_partial(self): cp2.profiles_1d.resize(4) for profile in cp2.profiles_1d: profile.ion.resize(3) - flattened2 = cp2.profiles_1d[:2].ion.flatten() + flattened2 = cp2.profiles_1d[:2].ion[:] assert len(flattened2) == 6 def test_flatten_empty_and_single(self): cp = IDSFactory("3.39.0").core_profiles() cp.profiles_1d.resize(2) - empty_flattened = cp.profiles_1d[:].ion.flatten() + empty_flattened = cp.profiles_1d[:].ion[:] assert len(empty_flattened) == 0 cp2 = IDSFactory("3.39.0").core_profiles() cp2.profiles_1d.resize(1) cp2.profiles_1d[0].ion.resize(4) - single_flattened = cp2.profiles_1d[:].ion.flatten() + single_flattened = cp2.profiles_1d[:].ion[:] assert len(single_flattened) == 4 def test_flatten_indexing_and_slicing(self): @@ -290,7 +290,7 @@ def test_flatten_indexing_and_slicing(self): for j, ion in enumerate(profile.ion): ion.label = f"ion_{i}_{j}" - flattened = cp.profiles_1d[:].ion.flatten() + flattened = cp.profiles_1d[:].ion[:] assert flattened[0].label == "ion_0_0" assert flattened[3].label == "ion_1_0" @@ -307,18 +307,18 @@ def test_flatten_repr_and_path(self): for profile in cp.profiles_1d: profile.ion.resize(2) - flattened = cp.profiles_1d[:].ion.flatten() + flattened = cp.profiles_1d[:].ion[:] repr_str = repr(flattened) assert "IDSSlice" in repr_str assert "4 matches" in repr_str - assert ".flatten()" in flattened._path + assert "[:]" in flattened._path def test_flatten_complex_case(self, wall_with_units): wall = wall_with_units units = wall.description_2d[0].vessel.unit[:5] - all_elements = units.element.flatten() + all_elements = units.element[:] assert len(all_elements) == 4 + 2 + 3 + 3 + 3 @@ -413,7 +413,7 @@ def test_values_integer_and_float_extraction(self): ion.neutral_index = i ion.z_ion = float(i + 1) - ions = cp.profiles_1d[:].ion.flatten() + ions = cp.profiles_1d[:].ion[:] indices = ions[:].neutral_index.values() assert all(isinstance(idx, (int, np.integer)) for idx in indices) @@ -452,7 +452,7 @@ def test_values_structure_preservation(self): for profile in cp.profiles_1d: profile.ion.resize(2) - ions = cp.profiles_1d[:].ion.flatten().values() + ions = cp.profiles_1d[:].ion[:].values() assert len(ions) == 6 for ion in ions: From 3939e551a39c2456a2b201cd9c014bfa43238131 Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Fri, 21 Nov 2025 18:16:44 +0100 Subject: [PATCH 19/44] Fix as per review comment https://github.com/iterorganization/IMAS-Python/pull/20#discussion_r2522894537 --- imas/ids_struct_array.py | 23 ++--------------------- 1 file changed, 2 insertions(+), 21 deletions(-) diff --git a/imas/ids_struct_array.py b/imas/ids_struct_array.py index be3f2b7..aba09ba 100644 --- a/imas/ids_struct_array.py +++ b/imas/ids_struct_array.py @@ -120,25 +120,6 @@ def _element_structure(self): struct = IDSStructure(self, self.metadata) return struct - @staticmethod - def _format_slice(slice_obj: slice) -> str: - """Format a slice object as a string. - - Args: - slice_obj: The slice object to format - - Returns: - String representation like "[1:5]", "[::2]", etc. - """ - start = slice_obj.start if slice_obj.start is not None else "" - stop = slice_obj.stop if slice_obj.stop is not None else "" - step = slice_obj.step if slice_obj.step is not None else "" - - if step: - return f"[{start}:{stop}:{step}]" - else: - return f"[{start}:{stop}]" - def __getitem__(self, item): """Get element(s) from the struct array. @@ -164,8 +145,8 @@ def __getitem__(self, item): if not isinstance(matched_elements, list): matched_elements = [matched_elements] - # Build the slice path representation - slice_str = self._format_slice(item) + # Build the slice path representation using IDSSlice's method + slice_str = IDSSlice._format_slice(item) return IDSSlice( self.metadata, From 2423a8901612547ec860c9a1e6fa0b84dfcb58ba Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Fri, 21 Nov 2025 18:19:33 +0100 Subject: [PATCH 20/44] Fix as per review comment https://github.com/iterorganization/IMAS-Python/pull/20#discussion_r2522897092 --- imas/ids_struct_array.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/imas/ids_struct_array.py b/imas/ids_struct_array.py index aba09ba..80587d5 100644 --- a/imas/ids_struct_array.py +++ b/imas/ids_struct_array.py @@ -142,8 +142,6 @@ def __getitem__(self, item): # Get the matched elements matched_elements = self.value[item] - if not isinstance(matched_elements, list): - matched_elements = [matched_elements] # Build the slice path representation using IDSSlice's method slice_str = IDSSlice._format_slice(item) From 69cb5663f048988efcfff77b9e76400920f50c2b Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Fri, 21 Nov 2025 18:28:58 +0100 Subject: [PATCH 21/44] formatting with black and flake8 --- imas/ids_slice.py | 9 ++++++--- imas/test/test_ids_slice.py | 20 ++++++++++---------- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/imas/ids_slice.py b/imas/ids_slice.py index 917ffb2..2e1f6a7 100644 --- a/imas/ids_slice.py +++ b/imas/ids_slice.py @@ -68,9 +68,12 @@ def __getitem__(self, item: Union[int, slice]) -> Union[Any, "IDSSlice"]: A single element if item is an int, or an IDSSlice if item is a slice """ from imas.ids_struct_array import IDSStructArray - - # Check if matched elements are IDSStructArray - if so, apply indexing to each array - if self._matched_elements and isinstance(self._matched_elements[0], IDSStructArray): + + # Check if matched elements are IDSStructArray + # If so, apply indexing to each array + if self._matched_elements and isinstance( + self._matched_elements[0], IDSStructArray + ): if isinstance(item, slice): # Apply the slice to each array and collect all results sliced_elements = [] diff --git a/imas/test/test_ids_slice.py b/imas/test/test_ids_slice.py index 318e1cc..63bfaa8 100644 --- a/imas/test/test_ids_slice.py +++ b/imas/test/test_ids_slice.py @@ -258,7 +258,7 @@ def test_flatten_basic_and_partial(self): profile.ion.resize(5) slice_obj = cp.profiles_1d[:].ion - flattened = slice_obj[:] + flattened = slice_obj[:] assert isinstance(flattened, IDSSlice) assert len(flattened) == 15 @@ -266,19 +266,19 @@ def test_flatten_basic_and_partial(self): cp2.profiles_1d.resize(4) for profile in cp2.profiles_1d: profile.ion.resize(3) - flattened2 = cp2.profiles_1d[:2].ion[:] + flattened2 = cp2.profiles_1d[:2].ion[:] assert len(flattened2) == 6 def test_flatten_empty_and_single(self): cp = IDSFactory("3.39.0").core_profiles() cp.profiles_1d.resize(2) - empty_flattened = cp.profiles_1d[:].ion[:] + empty_flattened = cp.profiles_1d[:].ion[:] assert len(empty_flattened) == 0 cp2 = IDSFactory("3.39.0").core_profiles() cp2.profiles_1d.resize(1) cp2.profiles_1d[0].ion.resize(4) - single_flattened = cp2.profiles_1d[:].ion[:] + single_flattened = cp2.profiles_1d[:].ion[:] assert len(single_flattened) == 4 def test_flatten_indexing_and_slicing(self): @@ -290,7 +290,7 @@ def test_flatten_indexing_and_slicing(self): for j, ion in enumerate(profile.ion): ion.label = f"ion_{i}_{j}" - flattened = cp.profiles_1d[:].ion[:] + flattened = cp.profiles_1d[:].ion[:] assert flattened[0].label == "ion_0_0" assert flattened[3].label == "ion_1_0" @@ -307,18 +307,18 @@ def test_flatten_repr_and_path(self): for profile in cp.profiles_1d: profile.ion.resize(2) - flattened = cp.profiles_1d[:].ion[:] + flattened = cp.profiles_1d[:].ion[:] repr_str = repr(flattened) assert "IDSSlice" in repr_str assert "4 matches" in repr_str - assert "[:]" in flattened._path + assert "[:]" in flattened._path def test_flatten_complex_case(self, wall_with_units): wall = wall_with_units units = wall.description_2d[0].vessel.unit[:5] - all_elements = units.element[:] + all_elements = units.element[:] assert len(all_elements) == 4 + 2 + 3 + 3 + 3 @@ -413,7 +413,7 @@ def test_values_integer_and_float_extraction(self): ion.neutral_index = i ion.z_ion = float(i + 1) - ions = cp.profiles_1d[:].ion[:] + ions = cp.profiles_1d[:].ion[:] indices = ions[:].neutral_index.values() assert all(isinstance(idx, (int, np.integer)) for idx in indices) @@ -452,7 +452,7 @@ def test_values_structure_preservation(self): for profile in cp.profiles_1d: profile.ion.resize(2) - ions = cp.profiles_1d[:].ion[:].values() + ions = cp.profiles_1d[:].ion[:].values() assert len(ions) == 6 for ion in ions: From e1502f18c956f7cc7c5fefe7c6a932d33a6aae21 Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Fri, 21 Nov 2025 19:07:16 +0100 Subject: [PATCH 22/44] refactored code --- imas/ids_slice.py | 28 +++++++++------------------- imas/test/test_ids_slice.py | 28 ++-------------------------- 2 files changed, 11 insertions(+), 45 deletions(-) diff --git a/imas/ids_slice.py b/imas/ids_slice.py index 2e1f6a7..85f1b1d 100644 --- a/imas/ids_slice.py +++ b/imas/ids_slice.py @@ -61,26 +61,29 @@ def __iter__(self) -> Iterator[Any]: def __getitem__(self, item: Union[int, slice]) -> Union[Any, "IDSSlice"]: """Get element(s) from the slice. + When the matched elements are IDSStructArray objects, the indexing + operation is applied to each array element (array-wise indexing). + Otherwise, the operation is applied to the matched elements list itself. + Args: - item: Index or slice to apply to the matched elements + item: Index or slice to apply Returns: - A single element if item is an int, or an IDSSlice if item is a slice + - IDSSlice: If item is a slice, or if applying integer index to + IDSStructArray elements + - Single element: If item is an int and elements are not IDSStructArray """ from imas.ids_struct_array import IDSStructArray - # Check if matched elements are IDSStructArray - # If so, apply indexing to each array + # Array-wise indexing: apply operation to each IDSStructArray element if self._matched_elements and isinstance( self._matched_elements[0], IDSStructArray ): if isinstance(item, slice): - # Apply the slice to each array and collect all results sliced_elements = [] for array in self._matched_elements: sliced_elements.extend(list(array[item])) - # Build the slice path representation slice_str = self._format_slice(item) new_path = self._slice_path + slice_str @@ -90,12 +93,10 @@ def __getitem__(self, item: Union[int, slice]) -> Union[Any, "IDSSlice"]: new_path, ) else: - # Apply integer index to each array indexed_elements = [] for array in self._matched_elements: indexed_elements.append(array[item]) - # Build the index path representation new_path = self._slice_path + f"[{item}]" return IDSSlice( @@ -104,12 +105,8 @@ def __getitem__(self, item: Union[int, slice]) -> Union[Any, "IDSSlice"]: new_path, ) else: - # Normal slice behavior for non-array elements if isinstance(item, slice): - # Further slice the matched elements themselves sliced_elements = self._matched_elements[item] - - # Build the slice path representation slice_str = self._format_slice(item) new_path = self._slice_path + slice_str @@ -119,7 +116,6 @@ def __getitem__(self, item: Union[int, slice]) -> Union[Any, "IDSSlice"]: new_path, ) else: - # Return a single element by index return self._matched_elements[int(item)] def __getattr__(self, name: str) -> "IDSSlice": @@ -134,7 +130,6 @@ def __getattr__(self, name: str) -> "IDSSlice": Returns: A new IDSSlice containing the child attribute from each matched element """ - # Try to get child metadata if available child_metadata = None if self.metadata is not None: try: @@ -142,10 +137,7 @@ def __getattr__(self, name: str) -> "IDSSlice": except (KeyError, TypeError): pass - # Access the attribute on each element child_elements = [getattr(element, name) for element in self] - - # Build the new path including the attribute access new_path = self._slice_path + "." + name return IDSSlice( @@ -191,10 +183,8 @@ def values(self) -> List[Any]: result = [] for element in self._matched_elements: if isinstance(element, IDSPrimitive): - # Extract the wrapped value from IDSPrimitive result.append(element.value) else: - # Return other types as-is (structures, arrays, etc.) result.append(element) return result diff --git a/imas/test/test_ids_slice.py b/imas/test/test_ids_slice.py index 63bfaa8..643021a 100644 --- a/imas/test/test_ids_slice.py +++ b/imas/test/test_ids_slice.py @@ -35,7 +35,6 @@ def create_wall_with_units( units.resize(total_units) if element_counts is None: - # Ensure unit index 1 has fewer elements than unit 0 to trigger the corner case. element_counts = [4, 2] + [3] * (total_units - 2) element_counts = list(element_counts) @@ -190,25 +189,6 @@ def test_repr_count_display(self): assert "3 matches" in repr_str -class TestIDSSliceValidation: - - def test_validate_slice(self): - cp = IDSFactory("3.39.0").core_profiles() - cp.profiles_1d.resize(2) - cp.ids_properties.homogeneous_time = 1 - - slice_obj = cp.profiles_1d[:] - assert isinstance(slice_obj, IDSSlice) - - -class TestIDSSliceHash: - - def test_xxhash(self): - # _xxhash method removed from IDSSlice as it's not needed - # (IDSSlice is not an IDSBase subclass) - pass - - class TestWallExampleSlicing: def test_wall_units_nested_element_access(self, wall_with_units): @@ -352,12 +332,10 @@ def test_unit_slice_element_individual_access(self, wall_varying_sizes): units = wall_varying_sizes.description_2d[0].vessel.unit element_slice = units[:2].element - # With new behavior, element_slice[0] gets first element from each array first_from_each = element_slice[0] assert isinstance(first_from_each, IDSSlice) - assert len(first_from_each) == 2 # 2 units, so 2 first elements + assert len(first_from_each) == 2 - # To access individual arrays, use iteration arrays = list(element_slice) assert len(arrays[0]) == 4 assert arrays[0][2].name.value == "element-0-2" @@ -374,12 +352,10 @@ def test_wall_with_diverse_element_counts(self): units_slice = units[:3] element_slice = units_slice.element - # With new behavior, element_slice[0] gets first element from each array first_from_each = element_slice[0] assert isinstance(first_from_each, IDSSlice) - assert len(first_from_each) == 3 # 3 units + assert len(first_from_each) == 3 - # To access individual arrays, use iteration arrays = list(element_slice) assert len(arrays[0]) == 3 assert len(arrays[2]) == 4 From 8099741bad6e9d610c10c6a5a219b6883e4f3e5e Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Fri, 21 Nov 2025 19:12:42 +0100 Subject: [PATCH 23/44] updated documentation --- docs/source/array_slicing.rst | 30 ++++++++++++++---------- docs/source/courses/advanced/explore.rst | 4 ++-- docs/source/imas_architecture.rst | 4 ++-- 3 files changed, 21 insertions(+), 17 deletions(-) diff --git a/docs/source/array_slicing.rst b/docs/source/array_slicing.rst index 122b566..67682f7 100644 --- a/docs/source/array_slicing.rst +++ b/docs/source/array_slicing.rst @@ -7,7 +7,7 @@ Key Difference --------------- - ``array[0]`` returns ``IDSStructure`` (single element) -- ``array[:]`` or ``array[1:5]`` returns ``IDSSlice`` (collection with ``flatten()`` and ``values()``) +- ``array[:]`` or ``array[1:5]`` returns ``IDSSlice`` (collection with ``values()`` method) Basic Usage ----------- @@ -27,8 +27,8 @@ Basic Usage subset = cp.profiles_1d[1:5] # IDSSlice every_other = cp.profiles_1d[::2] # IDSSlice - # Flatten nested arrays - all_ions = cp.profiles_1d[:].ion[:].flatten() # IDSSlice of individual ions + # Access nested arrays (automatic array-wise indexing) + all_ions = cp.profiles_1d[:].ion[:] # IDSSlice of individual ions # Extract values labels = all_ions.label.values() @@ -43,11 +43,11 @@ Common Patterns for element in cp.profiles_1d[5:10]: print(element.time) -**Flatten and iterate:** +**Iterate over nested arrays:** .. code-block:: python - for ion in cp.profiles_1d[:].ion[:].flatten(): + for ion in cp.profiles_1d[:].ion[:]: print(ion.label.value) **Get all values:** @@ -56,19 +56,23 @@ Common Patterns times = cp.profiles_1d[:].time.values() -Important Constraint --------------------- +Important: Array-wise Indexing +------------------------------- -When accessing attributes through a slice, all elements must have that attribute. -If elements are ``IDSStructArray`` objects, flatten first: +When accessing attributes through a slice of ``IDSStructArray`` elements, +the slice operation automatically applies to each array (array-wise indexing): .. code-block:: python - # Fails - IDSStructArray has no 'label' attribute - # cp.profiles_1d[:].ion[:].label + # Array-wise indexing: [:] applies to each ion array + all_ions = cp.profiles_1d[:].ion[:] + labels = all_ions.label.values() - # Correct - flatten first - labels = cp.profiles_1d[:].ion[:].flatten().label.values() + # Equivalent to manually iterating: + labels = [] + for profile in cp.profiles_1d[:]: + for ion in profile.ion: + labels.append(ion.label.value) Lazy-Loaded Arrays ------------------- diff --git a/docs/source/courses/advanced/explore.rst b/docs/source/courses/advanced/explore.rst index f16d254..02f1201 100644 --- a/docs/source/courses/advanced/explore.rst +++ b/docs/source/courses/advanced/explore.rst @@ -91,8 +91,8 @@ Arrays of structures support Python slice notation, which returns an subset = core_profiles.profiles_1d[2:5] # Elements 2, 3, 4 every_other = core_profiles.profiles_1d[::2] # Every second element - # IDSSlice supports flatten() and values() for convenient data access - all_ions = core_profiles.profiles_1d[:].ion[:].flatten() + # IDSSlice supports array-wise indexing and values() for data access + all_ions = core_profiles.profiles_1d[:].ion[:] for ion in all_ions: print(ion.label.value) diff --git a/docs/source/imas_architecture.rst b/docs/source/imas_architecture.rst index 6c61321..756d8f7 100644 --- a/docs/source/imas_architecture.rst +++ b/docs/source/imas_architecture.rst @@ -171,8 +171,8 @@ The following submodules and classes represent IDS nodes. - :py:mod:`imas.ids_slice` contains the :py:class:`~imas.ids_slice.IDSSlice` class, which represents a collection of IDS nodes matching a slice expression. It provides slicing operations on - :py:class:`~imas.ids_struct_array.IDSStructArray` elements and supports operations - like ``flatten()`` and ``values()`` for accessing matched elements. + :py:class:`~imas.ids_struct_array.IDSStructArray` elements with array-wise + indexing and supports the ``values()`` method for extracting raw data. - :py:mod:`imas.ids_structure` contains the :py:class:`~imas.ids_structure.IDSStructure` class, which models Structures. It From 21ded45d7033cf406dcde2add17478dfca82176e Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Fri, 21 Nov 2025 19:22:35 +0100 Subject: [PATCH 24/44] Fix as per review comment https://github.com/iterorganization/IMAS-Python/pull/20#discussion_r2522895820 --- imas/ids_struct_array.py | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/imas/ids_struct_array.py b/imas/ids_struct_array.py index 80587d5..a840703 100644 --- a/imas/ids_struct_array.py +++ b/imas/ids_struct_array.py @@ -130,20 +130,32 @@ def __getitem__(self, item): A single IDSStructure if item is an int, or an IDSSlice if item is a slice """ if isinstance(item, slice): - # Slicing is not supported on lazy-loaded arrays if self._lazy: - raise ValueError( - "Lazy-loaded IDSs do not support slicing. " - "Use integer indexing instead." + + self._load(None) # Load size + + # Convert slice to indices + start, stop, step = item.indices(len(self)) + + # Load only the elements in the slice range + loaded_elements = [] + for i in range(start, stop, step): + self._load(i) # Load each element on demand + loaded_elements.append(self.value[i]) + + from imas.ids_slice import IDSSlice + + slice_str = IDSSlice._format_slice(item) + + return IDSSlice( + self.metadata, + loaded_elements, + slice_str, ) - # Handle slice by returning an IDSSlice from imas.ids_slice import IDSSlice - # Get the matched elements matched_elements = self.value[item] - - # Build the slice path representation using IDSSlice's method slice_str = IDSSlice._format_slice(item) return IDSSlice( From e94d63448549d951f98b8b1d76f06cd017117263 Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Fri, 21 Nov 2025 19:36:28 +0100 Subject: [PATCH 25/44] fix issue in sphinx docs --- imas/ids_slice.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/imas/ids_slice.py b/imas/ids_slice.py index 85f1b1d..ab9af3d 100644 --- a/imas/ids_slice.py +++ b/imas/ids_slice.py @@ -24,6 +24,9 @@ class IDSSlice: - Further slicing of child elements - Attribute access on all matched elements - Iteration over matched elements + + Attributes: + metadata: Metadata from the parent array, or None if not available """ __slots__ = ["metadata", "_matched_elements", "_slice_path"] From e448883a7b15f013c50985b3692fa44c54dc9bfb Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Fri, 21 Nov 2025 23:58:32 +0100 Subject: [PATCH 26/44] lazy loaded objects supports slicing --- docs/source/array_slicing.rst | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/docs/source/array_slicing.rst b/docs/source/array_slicing.rst index 67682f7..43e3294 100644 --- a/docs/source/array_slicing.rst +++ b/docs/source/array_slicing.rst @@ -77,9 +77,12 @@ the slice operation automatically applies to each array (array-wise indexing): Lazy-Loaded Arrays ------------------- -Individual indexing works with lazy loading, but slicing doesn't: +Both individual indexing and slicing work with lazy loading: .. code-block:: python element = lazy_array[0] # OK - loads on demand - subset = lazy_array[1:5] # ValueError + subset = lazy_array[1:5] # OK - loads only requested elements on demand + +When slicing lazy-loaded arrays, only the elements in the slice range are loaded, +making it memory-efficient for large datasets. From 32265d127077865093a8e2f51aebd1e392c6d06e Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Mon, 24 Nov 2025 17:19:45 +0100 Subject: [PATCH 27/44] fix representation issue and empty slice raises exception https://github.com/iterorganization/IMAS-Python/pull/20#issuecomment-3569735098 --- .gitignore | 2 +- imas/ids_slice.py | 30 +++++++++++++++++++++++------- 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/.gitignore b/.gitignore index 8bff5e8..7f88e5d 100644 --- a/.gitignore +++ b/.gitignore @@ -5,7 +5,7 @@ __pycache__/ # C extensions *.so - +myenv # Distribution / packaging .Python env/ diff --git a/imas/ids_slice.py b/imas/ids_slice.py index ab9af3d..40d09d0 100644 --- a/imas/ids_slice.py +++ b/imas/ids_slice.py @@ -22,7 +22,7 @@ class IDSSlice: an IDSSlice is returned. This allows for: - Tracking the slice operation in the path - Further slicing of child elements - - Attribute access on all matched elements + - Child node access on all matched elements - Iteration over matched elements Attributes: @@ -122,17 +122,22 @@ def __getitem__(self, item: Union[int, slice]) -> Union[Any, "IDSSlice"]: return self._matched_elements[int(item)] def __getattr__(self, name: str) -> "IDSSlice": - """Access a child attribute on all matched elements. + """Access a child node on all matched elements. - This returns a new IDSSlice containing the child attribute from + This returns a new IDSSlice containing the child node from each matched element. Args: - name: Name of the attribute to access + name: Name of the node to access Returns: - A new IDSSlice containing the child attribute from each matched element + A new IDSSlice containing the child node from each matched element """ + if not self._matched_elements: + raise IndexError( + f"Cannot access node '{name}' on empty slice with 0 elements" + ) + child_metadata = None if self.metadata is not None: try: @@ -152,8 +157,19 @@ def __getattr__(self, name: str) -> "IDSSlice": def __repr__(self) -> str: """Build a string representation of this slice.""" matches_count = len(self._matched_elements) - match_word = "match" if matches_count == 1 else "matches" - return f"" + match_word = "item" if matches_count == 1 else "items" + + array_name = self.metadata.name if self.metadata else "" + ids_name = "" + if self._matched_elements: + elem = self._matched_elements[0] + if hasattr(elem, "_toplevel") and hasattr(elem._toplevel, "metadata"): + ids_name = elem._toplevel.metadata.name + ids_prefix = f"IDS:{ids_name}, " if ids_name else "" + + return ( + f"" + ) def values(self) -> List[Any]: """Extract raw values from elements in this slice. From 00752d2ee94039e4beb4ce467df5fbc0357a8f3f Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Fri, 28 Nov 2025 10:00:20 +0100 Subject: [PATCH 28/44] fix first and second point fix __repr__ and Empty slices do not check if a getattr is valid-> https://github.com/iterorganization/IMAS-Python/pull/20#issuecomment-3569735098 --- imas/ids_slice.py | 53 +++++++++++++++++++++++++------------ imas/ids_struct_array.py | 2 ++ imas/test/test_ids_slice.py | 11 ++++---- 3 files changed, 44 insertions(+), 22 deletions(-) diff --git a/imas/ids_slice.py b/imas/ids_slice.py index 40d09d0..b874d2b 100644 --- a/imas/ids_slice.py +++ b/imas/ids_slice.py @@ -8,10 +8,13 @@ """ import logging -from typing import Any, Iterator, List, Optional, Union +from typing import TYPE_CHECKING, Any, Iterator, List, Optional, Union from imas.ids_metadata import IDSMetadata +if TYPE_CHECKING: + from imas.ids_struct_array import IDSStructArray + logger = logging.getLogger(__name__) @@ -29,13 +32,14 @@ class IDSSlice: metadata: Metadata from the parent array, or None if not available """ - __slots__ = ["metadata", "_matched_elements", "_slice_path"] + __slots__ = ["metadata", "_matched_elements", "_slice_path", "_parent_array"] def __init__( self, metadata: Optional[IDSMetadata], matched_elements: List[Any], slice_path: str, + parent_array: Optional["IDSStructArray"] = None, ): """Initialize IDSSlice. @@ -43,10 +47,12 @@ def __init__( metadata: Metadata from the parent array matched_elements: List of elements that matched the slice slice_path: String representation of the slice operation (e.g., "[8:]") + parent_array: Optional reference to the parent IDSStructArray for context """ self.metadata = metadata self._matched_elements = matched_elements self._slice_path = slice_path + self._parent_array = parent_array @property def _path(self) -> str: @@ -94,6 +100,7 @@ def __getitem__(self, item: Union[int, slice]) -> Union[Any, "IDSSlice"]: self.metadata, sliced_elements, new_path, + parent_array=self._parent_array, ) else: indexed_elements = [] @@ -106,6 +113,7 @@ def __getitem__(self, item: Union[int, slice]) -> Union[Any, "IDSSlice"]: self.metadata, indexed_elements, new_path, + parent_array=self._parent_array, ) else: if isinstance(item, slice): @@ -117,6 +125,7 @@ def __getitem__(self, item: Union[int, slice]) -> Union[Any, "IDSSlice"]: self.metadata, sliced_elements, new_path, + parent_array=self._parent_array, ) else: return self._matched_elements[int(item)] @@ -152,24 +161,34 @@ def __getattr__(self, name: str) -> "IDSSlice": child_metadata, child_elements, new_path, + parent_array=self._parent_array, ) def __repr__(self) -> str: - """Build a string representation of this slice.""" - matches_count = len(self._matched_elements) - match_word = "item" if matches_count == 1 else "items" - - array_name = self.metadata.name if self.metadata else "" - ids_name = "" - if self._matched_elements: - elem = self._matched_elements[0] - if hasattr(elem, "_toplevel") and hasattr(elem._toplevel, "metadata"): - ids_name = elem._toplevel.metadata.name - ids_prefix = f"IDS:{ids_name}, " if ids_name else "" - - return ( - f"" - ) + """Build a string representation of this IDSSlice. + + Returns a string showing: + - The IDS type name (e.g., 'equilibrium') + - The full path including the slice operation (e.g., 'time_slice[:]') + - The number of matched elements + + Returns: + String representation like below + like '' + """ + from imas.util import get_toplevel, get_full_path + + my_repr = f"<{type(self).__name__}" + ids_name = "unknown" + full_path = self._path + + if self._parent_array is not None: + ids_name = get_toplevel(self._parent_array).metadata.name + parent_array_path = get_full_path(self._parent_array) + full_path = parent_array_path + self._path + item_word = "item" if len(self) == 1 else "items" + my_repr += f" (IDS:{ids_name}, {full_path} with {len(self)} {item_word})>" + return my_repr def values(self) -> List[Any]: """Extract raw values from elements in this slice. diff --git a/imas/ids_struct_array.py b/imas/ids_struct_array.py index a840703..b06396b 100644 --- a/imas/ids_struct_array.py +++ b/imas/ids_struct_array.py @@ -151,6 +151,7 @@ def __getitem__(self, item): self.metadata, loaded_elements, slice_str, + parent_array=self, ) from imas.ids_slice import IDSSlice @@ -162,6 +163,7 @@ def __getitem__(self, item): self.metadata, matched_elements, slice_str, + parent_array=self, ) else: # Handle integer index diff --git a/imas/test/test_ids_slice.py b/imas/test/test_ids_slice.py index 643021a..9bb27e5 100644 --- a/imas/test/test_ids_slice.py +++ b/imas/test/test_ids_slice.py @@ -181,12 +181,12 @@ def test_repr_count_display(self): slice_obj = cp.profiles_1d[5:6] repr_str = repr(slice_obj) assert "IDSSlice" in repr_str - assert "1 match" in repr_str + assert "1 item" in repr_str slice_obj = cp.profiles_1d[5:8] repr_str = repr(slice_obj) assert "IDSSlice" in repr_str - assert "3 matches" in repr_str + assert "3 items" in repr_str class TestWallExampleSlicing: @@ -291,7 +291,7 @@ def test_flatten_repr_and_path(self): repr_str = repr(flattened) assert "IDSSlice" in repr_str - assert "4 matches" in repr_str + assert "4 items" in repr_str assert "[:]" in flattened._path def test_flatten_complex_case(self, wall_with_units): @@ -406,8 +406,9 @@ def test_values_partial_and_empty_slices(self, wall_with_units): cp = IDSFactory("3.39.0").core_profiles() cp.profiles_1d.resize(5) - empty_values = cp.profiles_1d[5:10].label.values() - assert len(empty_values) == 0 + # Empty slices should raise IndexError when accessing attributes + with pytest.raises(IndexError): + cp.profiles_1d[5:10].label.values() def test_values_with_step_and_negative_indices(self, wall_with_units): wall = wall_with_units From 93bdb50ed56d5eff00eaa40e083d71a0e9e2cc6e Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Fri, 28 Nov 2025 13:58:14 +0100 Subject: [PATCH 29/44] fix first and second point fix __repr__ and Empty slices do not check if a getattr is valid-> https://github.com/iterorganization/IMAS-Python/pull/20#issuecomment-3569735098 --- imas/ids_slice.py | 343 ++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 319 insertions(+), 24 deletions(-) diff --git a/imas/ids_slice.py b/imas/ids_slice.py index b874d2b..83342a4 100644 --- a/imas/ids_slice.py +++ b/imas/ids_slice.py @@ -8,7 +8,9 @@ """ import logging -from typing import TYPE_CHECKING, Any, Iterator, List, Optional, Union +from typing import TYPE_CHECKING, Any, Iterator, List, Optional, Tuple, Union + +import numpy as np from imas.ids_metadata import IDSMetadata @@ -32,7 +34,14 @@ class IDSSlice: metadata: Metadata from the parent array, or None if not available """ - __slots__ = ["metadata", "_matched_elements", "_slice_path", "_parent_array"] + __slots__ = [ + "metadata", + "_matched_elements", + "_slice_path", + "_parent_array", + "_virtual_shape", + "_element_hierarchy", + ] def __init__( self, @@ -40,6 +49,8 @@ def __init__( matched_elements: List[Any], slice_path: str, parent_array: Optional["IDSStructArray"] = None, + virtual_shape: Optional[Tuple[int, ...]] = None, + element_hierarchy: Optional[List[Any]] = None, ): """Initialize IDSSlice. @@ -48,17 +59,33 @@ def __init__( matched_elements: List of elements that matched the slice slice_path: String representation of the slice operation (e.g., "[8:]") parent_array: Optional reference to the parent IDSStructArray for context + virtual_shape: Optional tuple representing multi-dimensional shape + element_hierarchy: Optional tracking of element grouping """ self.metadata = metadata self._matched_elements = matched_elements self._slice_path = slice_path self._parent_array = parent_array + self._virtual_shape = virtual_shape or (len(matched_elements),) + self._element_hierarchy = element_hierarchy or [len(matched_elements)] @property def _path(self) -> str: """Return the path representation of this slice.""" return self._slice_path + @property + def shape(self) -> Tuple[int, ...]: + """Get the virtual multi-dimensional shape. + + Returns the shape of the data as if it were organized in a multi-dimensional + array, based on the hierarchy of slicing operations performed. + + Returns: + Tuple of dimensions. Use with caution for jagged arrays where sizes vary. + """ + return self._virtual_shape + def __len__(self) -> int: """Return the number of elements matched by this slice.""" return len(self._matched_elements) @@ -89,31 +116,53 @@ def __getitem__(self, item: Union[int, slice]) -> Union[Any, "IDSSlice"]: self._matched_elements[0], IDSStructArray ): if isinstance(item, slice): + # NEW: Preserve structure instead of flattening sliced_elements = [] + sliced_sizes = [] + for array in self._matched_elements: - sliced_elements.extend(list(array[item])) + sliced = array[item] + if isinstance(sliced, IDSSlice): + sliced_elements.extend(sliced._matched_elements) + sliced_sizes.append(len(sliced)) + else: + sliced_elements.append(sliced) + sliced_sizes.append(1) slice_str = self._format_slice(item) new_path = self._slice_path + slice_str + # NEW: Update shape to reflect the sliced structure + # Keep first dimensions, update last dimension + new_virtual_shape = self._virtual_shape[:-1] + (sliced_sizes[0] if sliced_sizes else 0,) + new_hierarchy = self._element_hierarchy[:-1] + [sliced_sizes] + return IDSSlice( self.metadata, sliced_elements, new_path, parent_array=self._parent_array, + virtual_shape=new_virtual_shape, + element_hierarchy=new_hierarchy, ) else: + # Integer indexing on arrays indexed_elements = [] for array in self._matched_elements: - indexed_elements.append(array[item]) + indexed_elements.append(array[int(item)]) new_path = self._slice_path + f"[{item}]" + # Shape changes: last dimension becomes 1 + new_virtual_shape = self._virtual_shape[:-1] + (1,) + return IDSSlice( self.metadata, indexed_elements, new_path, parent_array=self._parent_array, + virtual_shape=new_virtual_shape, + element_hierarchy=self._element_hierarchy, ) else: if isinstance(item, slice): @@ -121,11 +170,17 @@ def __getitem__(self, item: Union[int, slice]) -> Union[Any, "IDSSlice"]: slice_str = self._format_slice(item) new_path = self._slice_path + slice_str + # NEW: Update shape to reflect the slice on first dimension + new_virtual_shape = (len(sliced_elements),) + self._virtual_shape[1:] + new_element_hierarchy = [len(sliced_elements)] + self._element_hierarchy[1:] + return IDSSlice( self.metadata, sliced_elements, new_path, parent_array=self._parent_array, + virtual_shape=new_virtual_shape, + element_hierarchy=new_element_hierarchy, ) else: return self._matched_elements[int(item)] @@ -134,7 +189,8 @@ def __getattr__(self, name: str) -> "IDSSlice": """Access a child node on all matched elements. This returns a new IDSSlice containing the child node from - each matched element. + each matched element. Preserves multi-dimensional structure + when child elements are arrays. Args: name: Name of the node to access @@ -147,6 +203,8 @@ def __getattr__(self, name: str) -> "IDSSlice": f"Cannot access node '{name}' on empty slice with 0 elements" ) + from imas.ids_struct_array import IDSStructArray + child_metadata = None if self.metadata is not None: try: @@ -157,12 +215,65 @@ def __getattr__(self, name: str) -> "IDSSlice": child_elements = [getattr(element, name) for element in self] new_path = self._slice_path + "." + name - return IDSSlice( - child_metadata, - child_elements, - new_path, - parent_array=self._parent_array, - ) + # Check if children are IDSStructArray (nested arrays) or IDSNumericArray + if not child_elements: + # Empty slice + return IDSSlice( + child_metadata, + child_elements, + new_path, + parent_array=self._parent_array, + virtual_shape=self._virtual_shape, + element_hierarchy=self._element_hierarchy, + ) + + from imas.ids_primitive import IDSNumericArray + + if isinstance(child_elements[0], IDSStructArray): + # Children are IDSStructArray - track the new dimension + child_sizes = [len(arr) for arr in child_elements] + + # New virtual shape: current shape + new dimension + new_virtual_shape = self._virtual_shape + (child_sizes[0] if child_sizes else 0,) + new_hierarchy = self._element_hierarchy + [child_sizes] + + return IDSSlice( + child_metadata, + child_elements, + new_path, + parent_array=self._parent_array, + virtual_shape=new_virtual_shape, + element_hierarchy=new_hierarchy, + ) + elif isinstance(child_elements[0], IDSNumericArray): + # Children are IDSNumericArray - track the array dimension + # Each IDSNumericArray has a size (length of its data) + child_sizes = [len(arr) for arr in child_elements] + + # New virtual shape: current shape + new dimension (size of first numeric array) + # Note: For now we assume all have same size (regular array) + # Jagged arrays handled by to_array() with object dtype + new_virtual_shape = self._virtual_shape + (child_sizes[0] if child_sizes else 0,) + new_hierarchy = self._element_hierarchy + [child_sizes] + + return IDSSlice( + child_metadata, + child_elements, + new_path, + parent_array=self._parent_array, + virtual_shape=new_virtual_shape, + element_hierarchy=new_hierarchy, + ) + else: + # Children are not arrays (structures or other primitives) + return IDSSlice( + child_metadata, + child_elements, + new_path, + parent_array=self._parent_array, + virtual_shape=self._virtual_shape, + element_hierarchy=self._element_hierarchy, + ) def __repr__(self) -> str: """Build a string representation of this IDSSlice. @@ -190,41 +301,225 @@ def __repr__(self) -> str: my_repr += f" (IDS:{ids_name}, {full_path} with {len(self)} {item_word})>" return my_repr - def values(self) -> List[Any]: + def values(self, reshape: bool = False) -> Any: """Extract raw values from elements in this slice. For IDSPrimitive elements, this extracts the wrapped value. For other element types, returns them as-is. + For multi-dimensional slices (when shape has multiple dimensions), + this extracts values respecting the multi-dimensional structure. + This is useful for getting the actual data without the IDS wrapper when accessing scalar fields through a slice, without requiring explicit looping through the original collection. + Args: + reshape: If True, reshape result to match self.shape for + multi-dimensional slices. If False (default), return flat list + or list of extracted values. + Returns: - List of raw Python/numpy values or other unwrapped elements + For 1D: List of raw Python/numpy values or unwrapped elements + For multi-D with reshape=False: List of elements (each being an array) + For multi-D with reshape=True: numpy.ndarray with shape self.shape, + or nested lists/object array representing structure Examples: >>> # Get names from identifiers without looping >>> n = edge_profiles.grid_ggd[0].grid_subset[:].identifier.name.values() >>> # Result: ["nodes", "edges", "cells"] >>> - >>> # Works with any scalar or array type - >>> i = edge_profiles.grid_ggd[0].grid_subset[:].identifier.index.values() - >>> # Result: [1, 2, 5] + >>> # Get 2D array but as list of arrays (default) + >>> rho = core_profiles.profiles_1d[:].grid.rho_tor.values() + >>> # Result: [ndarray(100,), ndarray(100,), ...] - list of 106 arrays >>> - >>> # Still works with structures (returns unwrapped) - >>> ions = profiles[:].ion.values() - >>> # Result: [IDSStructure(...), IDSStructure(...), ...] + >>> # Get 2D array reshaped to (106, 100) + >>> rho = core_profiles.profiles_1d[:].grid.rho_tor.values(reshape=True) + >>> # Result: ndarray shape (106, 100) + >>> + >>> # 3D ions case - returns object array with structure + >>> ion_rho = core_profiles.profiles_1d[:].ion[:].element[:].density.values(reshape=True) + >>> # Result: object array shape (106, 3, 2) with IDSNumericArray elements """ - from imas.ids_primitive import IDSPrimitive + from imas.ids_primitive import IDSPrimitive, IDSNumericArray + + # Default behavior: return flat list without reshape + if not reshape: + result = [] + for element in self._matched_elements: + if isinstance(element, IDSPrimitive): + result.append(element.value) + else: + result.append(element) + return result + + # Multi-dimensional case with reshape requested + flat_values = [] + for element in self._matched_elements: + if isinstance(element, IDSPrimitive): + flat_values.append(element.value) + elif isinstance(element, IDSNumericArray): + flat_values.append(element.data if hasattr(element, 'data') else element.value) + else: + flat_values.append(element) + + # For 1D, just return as is + if len(self._virtual_shape) == 1: + return flat_values + + # Try to reshape to multi-dimensional shape + try: + # Calculate total size + total_size = 1 + for dim in self._virtual_shape: + total_size *= dim + + # Check if sizes match + if len(flat_values) == total_size: + # Successfully reshape to multi-dimensional + return np.array(flat_values, dtype=object).reshape(self._virtual_shape) + except (ValueError, TypeError): + pass + + # If reshape fails or not all elements are extractable, return as object array + try: + return np.array(flat_values, dtype=object).reshape(self._virtual_shape[0:1]) + except (ValueError, TypeError): + return flat_values + + def to_array(self) -> np.ndarray: + """Convert this slice to a numpy array respecting multi-dimensional structure. + + For 1D slices, returns a simple 1D array. + For multi-dimensional slices, returns an array with shape self.shape. + For jagged arrays (varying sizes in lower dimensions), returns an object array. + + This is useful for integration with numpy operations, scipy functions, + and xarray data structures. The returned array preserves the hierarchical + structure of the IMAS data. - result = [] + Returns: + numpy.ndarray with shape self.shape. For jagged arrays, dtype will be object. + + Raises: + ValueError: If array cannot be converted to numpy + + Examples: + >>> # Convert 2D slice to numpy array + >>> rho_array = core_profiles.profiles_1d[:].grid.rho_tor.to_array() + >>> # Result: ndarray shape (106, 100), dtype float64 + >>> print(rho_array.shape) + (106, 100) + >>> + >>> # Jagged array returns object array + >>> ion_density = core_profiles.profiles_1d[:].ion[:].density.to_array() + >>> # Result: object array shape (106, 3) with varying sizes + >>> + >>> # Can be used directly with numpy functions + >>> mean_rho = np.mean(rho_array, axis=1) + >>> # Result: (106,) array of mean values + """ + from imas.ids_primitive import IDSPrimitive, IDSNumericArray + + # 1D case - simple conversion + if len(self._virtual_shape) == 1: + flat_values = [] + for element in self._matched_elements: + if isinstance(element, IDSPrimitive): + flat_values.append(element.value) + else: + flat_values.append(element) + try: + return np.array(flat_values) + except (ValueError, TypeError): + return np.array(flat_values, dtype=object) + + # Multi-dimensional case + # Check if matched elements are themselves arrays (IDSNumericArray) + if self._matched_elements and isinstance(self._matched_elements[0], IDSNumericArray): + # Elements are numeric arrays - extract their values and stack them + array_values = [] + for element in self._matched_elements: + if isinstance(element, IDSNumericArray): + array_values.append(element.value) + else: + array_values.append(element) + + # Try to stack into proper shape + try: + # Check if all arrays have the same size (regular) + sizes = [] + for val in array_values: + if hasattr(val, '__len__'): + sizes.append(len(val)) + else: + sizes.append(1) + + # If all sizes are the same, we can create a regular array + if len(set(sizes)) == 1: + # Regular array - all sub-arrays same size + stacked = np.array(array_values) + # Should now have shape (first_dim, second_dim) + if stacked.shape == self._virtual_shape: + return stacked + else: + # Try explicit reshape + try: + return stacked.reshape(self._virtual_shape) + except ValueError: + # If reshape fails, return as object array + result_arr = np.empty(self._virtual_shape, dtype=object) + for i, val in enumerate(array_values): + result_arr.flat[i] = val + return result_arr + else: + # Jagged array - different sizes + result_arr = np.empty(self._virtual_shape[0], dtype=object) + for i, val in enumerate(array_values): + result_arr[i] = val + return result_arr + except (ValueError, TypeError) as e: + # Fallback: return object array + result_arr = np.empty(self._virtual_shape[0], dtype=object) + for i, val in enumerate(array_values): + result_arr[i] = val + return result_arr + + # For non-numeric elements in multi-dimensional structure + # Extract and try to build structure + flat_values = [] for element in self._matched_elements: if isinstance(element, IDSPrimitive): - result.append(element.value) + flat_values.append(element.value) else: - result.append(element) - return result + flat_values.append(element) + + total_size = 1 + for dim in self._virtual_shape: + total_size *= dim + + # Check if we have the right number of elements + if len(flat_values) != total_size: + raise ValueError( + f"Cannot convert to array: expected {total_size} elements " + f"but got {len(flat_values)}" + ) + + # Try to create the array + try: + arr = np.array(flat_values) + try: + # Try to reshape to target shape + return arr.reshape(self._virtual_shape) + except (ValueError, TypeError): + # If reshape fails, use object array + arr_obj = np.empty(self._virtual_shape, dtype=object) + for i, val in enumerate(flat_values): + arr_obj.flat[i] = val + return arr_obj + except (ValueError, TypeError) as e: + raise ValueError(f"Failed to convert slice to numpy array: {e}") @staticmethod def _format_slice(slice_obj: slice) -> str: From 757af7586b4e4b76f4d13d2b4c8c6c16a31c1c9d Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Fri, 28 Nov 2025 17:41:07 +0100 Subject: [PATCH 30/44] added tests --- imas/ids_slice.py | 7 +- imas/test/test_multidim_slicing.py | 357 +++++++++++++++++++++++++++++ 2 files changed, 360 insertions(+), 4 deletions(-) create mode 100644 imas/test/test_multidim_slicing.py diff --git a/imas/ids_slice.py b/imas/ids_slice.py index 83342a4..09b8d69 100644 --- a/imas/ids_slice.py +++ b/imas/ids_slice.py @@ -116,7 +116,7 @@ def __getitem__(self, item: Union[int, slice]) -> Union[Any, "IDSSlice"]: self._matched_elements[0], IDSStructArray ): if isinstance(item, slice): - # NEW: Preserve structure instead of flattening + # Preserve structure instead of flattening sliced_elements = [] sliced_sizes = [] @@ -132,7 +132,7 @@ def __getitem__(self, item: Union[int, slice]) -> Union[Any, "IDSSlice"]: slice_str = self._format_slice(item) new_path = self._slice_path + slice_str - # NEW: Update shape to reflect the sliced structure + # Update shape to reflect the sliced structure # Keep first dimensions, update last dimension new_virtual_shape = self._virtual_shape[:-1] + (sliced_sizes[0] if sliced_sizes else 0,) new_hierarchy = self._element_hierarchy[:-1] + [sliced_sizes] @@ -170,7 +170,7 @@ def __getitem__(self, item: Union[int, slice]) -> Union[Any, "IDSSlice"]: slice_str = self._format_slice(item) new_path = self._slice_path + slice_str - # NEW: Update shape to reflect the slice on first dimension + # Update shape to reflect the slice on first dimension new_virtual_shape = (len(sliced_elements),) + self._virtual_shape[1:] new_element_hierarchy = [len(sliced_elements)] + self._element_hierarchy[1:] @@ -251,7 +251,6 @@ def __getattr__(self, name: str) -> "IDSSlice": child_sizes = [len(arr) for arr in child_elements] # New virtual shape: current shape + new dimension (size of first numeric array) - # Note: For now we assume all have same size (regular array) # Jagged arrays handled by to_array() with object dtype new_virtual_shape = self._virtual_shape + (child_sizes[0] if child_sizes else 0,) new_hierarchy = self._element_hierarchy + [child_sizes] diff --git a/imas/test/test_multidim_slicing.py b/imas/test/test_multidim_slicing.py new file mode 100644 index 0000000..b0c908b --- /dev/null +++ b/imas/test/test_multidim_slicing.py @@ -0,0 +1,357 @@ +# This file is part of IMAS-Python. +# You should have received the IMAS-Python LICENSE file with this project. +"""Tests for multi-dimensional slicing support in IDSSlice.""" + +import numpy as np +import pytest + +from imas.ids_factory import IDSFactory +from imas.ids_slice import IDSSlice + + +class TestMultiDimSlicing: + """Shape tracking and conversion methods.""" + + def test_shape_property_single_level(self): + """Test shape property for single-level slice.""" + cp = IDSFactory("3.39.0").core_profiles() + cp.profiles_1d.resize(10) + + result = cp.profiles_1d[:] + assert hasattr(result, "shape") + assert result.shape == (10,) + + def test_shape_property_two_level(self): + """Test shape property for 2D array access.""" + cp = IDSFactory("3.39.0").core_profiles() + cp.profiles_1d.resize(5) + for p in cp.profiles_1d: + p.grid.rho_tor_norm = np.array([0.0, 0.5, 1.0]) + + result = cp.profiles_1d[:].grid.rho_tor_norm + assert result.shape == (5, 3) + + def test_shape_property_three_level(self): + """Test shape property for 3D nested structure.""" + cp = IDSFactory("3.39.0").core_profiles() + cp.profiles_1d.resize(3) + for p in cp.profiles_1d: + p.ion.resize(2) + for i in p.ion: + i.element.resize(2) + + result = cp.profiles_1d[:].ion[:].element[:] + assert result.shape == (3, 2, 2) + + def test_to_array_2d_regular(self): + """Test to_array() with regular 2D array.""" + cp = IDSFactory("3.39.0").core_profiles() + cp.profiles_1d.resize(5) + for i, p in enumerate(cp.profiles_1d): + p.grid.rho_tor_norm = np.array([0.0, 0.5, 1.0]) + + result = cp.profiles_1d[:].grid.rho_tor_norm + array = result.to_array() + + assert isinstance(array, np.ndarray) + assert array.shape == (5, 3) + assert np.allclose(array[0], [0.0, 0.5, 1.0]) + assert np.allclose(array[4], [0.0, 0.5, 1.0]) + + def test_to_array_3d_regular(self): + """Test to_array() with regular 3D array.""" + cp = IDSFactory("3.39.0").core_profiles() + cp.profiles_1d.resize(3) + for p in cp.profiles_1d: + p.ion.resize(2) + for i_idx, i in enumerate(p.ion): + i.element.resize(2) + for e_idx, e in enumerate(i.element): + e.z_n = float(e_idx) + + result = cp.profiles_1d[:].ion[:].element[:].z_n + array = result.to_array() + + assert isinstance(array, np.ndarray) + assert array.shape == (3, 2, 2) + assert np.allclose(array[0, 0, :], [0.0, 1.0]) + assert np.allclose(array[0, 1, :], [0.0, 1.0]) + + def test_to_array_jagged(self): + """Test to_array() with jagged (variable-size) arrays.""" + cp = IDSFactory("3.39.0").core_profiles() + cp.profiles_1d.resize(3) + cp.profiles_1d[0].grid.rho_tor_norm = np.array([0.0, 0.5, 1.0]) + cp.profiles_1d[1].grid.rho_tor_norm = np.array([0.0, 0.25, 0.5, 0.75, 1.0]) + cp.profiles_1d[2].grid.rho_tor_norm = np.array([0.0, 0.5, 1.0]) + + result = cp.profiles_1d[:].grid.rho_tor_norm + array = result.to_array() + + assert array.dtype == object + assert len(array) == 3 + assert len(array[0]) == 3 + assert len(array[1]) == 5 + assert len(array[2]) == 3 + + def test_enhanced_values_2d(self): + """Test enhanced values() method for 2D extraction.""" + cp = IDSFactory("3.39.0").core_profiles() + cp.profiles_1d.resize(3) + for p in cp.profiles_1d: + p.grid.rho_tor_norm = np.array([0.0, 0.5, 1.0]) + + result = cp.profiles_1d[:].grid.rho_tor_norm + values = result.values() + + # Should be a list of 3 arrays + assert isinstance(values, list) + assert len(values) == 3 + for v in values: + assert isinstance(v, np.ndarray) + assert len(v) == 3 + + def test_enhanced_values_3d(self): + """Test enhanced values() method for 3D extraction.""" + cp = IDSFactory("3.39.0").core_profiles() + cp.profiles_1d.resize(2) + for p in cp.profiles_1d: + p.ion.resize(2) + for i in p.ion: + i.element.resize(2) + for e_idx, e in enumerate(i.element): + e.z_n = float(e_idx) + + result = cp.profiles_1d[:].ion[:].element[:].z_n + values = result.values() + + assert isinstance(values, list) + assert len(values) == 8 # 2 profiles * 2 ions * 2 elements + + def test_slice_preserves_groups(self): + """Test that slicing preserves group structure.""" + cp = IDSFactory("3.39.0").core_profiles() + cp.profiles_1d.resize(10) + for p in cp.profiles_1d: + p.ion.resize(3) + + # Get all ions, then slice + result = cp.profiles_1d[:].ion[:] + + # Should still know the structure: 10 profiles, 3 ions each + assert result.shape == (10, 3) + assert len(result) == 30 # Flattened for iteration, but shape preserved + + def test_integer_index_on_nested(self): + """Test integer indexing on nested structures.""" + cp = IDSFactory("3.39.0").core_profiles() + cp.profiles_1d.resize(5) + for i, p in enumerate(cp.profiles_1d): + p.ion.resize(2) + for j, ion in enumerate(p.ion): + ion.label = f"ion_{i}_{j}" + + # Get first ion from all profiles + result = cp.profiles_1d[:].ion[0] + + assert len(result) == 5 + for i, ion in enumerate(result): + assert ion.label == f"ion_{i}_0" + + def test_slice_on_nested_arrays(self): + """Test slicing on nested arrays.""" + cp = IDSFactory("3.39.0").core_profiles() + cp.profiles_1d.resize(5) + for p in cp.profiles_1d: + p.ion.resize(4) + + # Get first 2 ions from each profile + result = cp.profiles_1d[:].ion[:2] + + assert result.shape == (5, 2) + assert len(result) == 10 # 5 profiles * 2 ions each + + def test_step_slicing_on_nested(self): + """Test step slicing on nested structures.""" + cp = IDSFactory("3.39.0").core_profiles() + cp.profiles_1d.resize(5) + for p in cp.profiles_1d: + p.ion.resize(6) + + # Get every other ion + result = cp.profiles_1d[:].ion[::2] + + assert result.shape == (5, 3) # 5 profiles, 3 ions each (0, 2, 4) + assert len(result) == 15 + + def test_negative_indexing_on_nested(self): + """Test negative indexing on nested structures.""" + cp = IDSFactory("3.39.0").core_profiles() + cp.profiles_1d.resize(5) + for p in cp.profiles_1d: + p.ion.resize(3) + for j, ion in enumerate(p.ion): + ion.label = f"ion_{j}" + + # Get last ion from each profile + result = cp.profiles_1d[:].ion[-1] + + assert len(result) == 5 + for ion in result: + assert ion.label == "ion_2" + + def test_to_array_grouped_structure(self): + """Test that to_array preserves grouped structure.""" + cp = IDSFactory("3.39.0").core_profiles() + cp.profiles_1d.resize(3) + for p_idx, p in enumerate(cp.profiles_1d): + p.ion.resize(2) + for i_idx, i in enumerate(p.ion): + i.z_ion = float(p_idx * 10 + i_idx) + + result = cp.profiles_1d[:].ion[:].z_ion + array = result.to_array() + + # Should be (3, 2) array + assert array.shape == (3, 2) + assert array[0, 0] == 0.0 + assert array[1, 0] == 10.0 + assert array[2, 1] == 21.0 + + @pytest.mark.skip(reason="Phase 3 feature - boolean indexing not yet implemented") + def test_boolean_indexing_simple(self): + """Test boolean indexing on slices.""" + cp = IDSFactory("3.39.0").core_profiles() + cp.profiles_1d.resize(5) + for i, p in enumerate(cp.profiles_1d): + p.electrons.density = np.array([float(i)] * 5) + + result = cp.profiles_1d[:].electrons.density + + mask = np.array([True, False, True, False, True]) + filtered = result[mask] + assert len(filtered) == 3 + + def test_assignment_on_slice(self): + """Test assignment through slices.""" + cp = IDSFactory("3.39.0").core_profiles() + cp.profiles_1d.resize(3) + for p in cp.profiles_1d: + p.grid.rho_tor_norm = np.array([0.0, 0.5, 1.0]) + + # Assign new values through slice + new_values = np.array([[0.1, 0.6, 1.1], + [0.2, 0.7, 1.2], + [0.3, 0.8, 1.3]]) + + # This requires assignment support + # cp.profiles_1d[:].grid.rho_tor_norm[:] = new_values + # For now, verify slicing works for reading + + result = cp.profiles_1d[:].grid.rho_tor_norm + array = result.to_array() + assert array.shape == (3, 3) + + def test_xarray_integration_compatible(self): + """Test that output is compatible with xarray.""" + cp = IDSFactory("3.39.0").core_profiles() + cp.profiles_1d.resize(3) + cp.time = np.array([1.0, 2.0, 3.0]) + + for i, p in enumerate(cp.profiles_1d): + p.grid.rho_tor_norm = np.array([0.0, 0.5, 1.0]) + p.electrons.temperature = np.array([1.0, 2.0, 3.0]) * (i + 1) + + # Test that we can extract values in xarray-compatible format + temps = cp.profiles_1d[:].electrons.temperature.to_array() + times = cp.time + + assert temps.shape == (3, 3) + assert len(times) == 3 + + def test_performance_large_hierarchy(self): + """Test performance with large nested hierarchies.""" + cp = IDSFactory("3.39.0").core_profiles() + n_profiles = 50 + cp.profiles_1d.resize(n_profiles) + + for p in cp.profiles_1d: + p.grid.rho_tor_norm = np.linspace(0, 1, 100) + p.ion.resize(5) + for i in p.ion: + i.element.resize(3) + + # Should handle large data without significant slowdown + result = cp.profiles_1d[:].grid.rho_tor_norm + array = result.to_array() + + assert array.shape == (n_profiles, 100) + + def test_lazy_loading_with_multidim(self): + """Test that lazy loading works with multi-dimensional slicing.""" + # This would require a database, so we'll test with in-memory + cp = IDSFactory("3.39.0").core_profiles() + cp.profiles_1d.resize(5) + for p in cp.profiles_1d: + p.grid.rho_tor_norm = np.array([0.0, 0.5, 1.0]) + + result = cp.profiles_1d[:].grid.rho_tor_norm + + # Verify lazy attributes are preserved + assert hasattr(result, '_lazy') + assert hasattr(result, '_parent_array') + + + + +class TestEdgeCases: + """Test edge cases and error conditions.""" + + def test_empty_slice(self): + """Test slicing that results in empty arrays.""" + cp = IDSFactory("3.39.0").core_profiles() + cp.profiles_1d.resize(5) + for p in cp.profiles_1d: + p.ion.resize(0) + + result = cp.profiles_1d[:].ion + assert len(result) == 5 + for ions in result: + # Each should be empty + pass + + def test_single_element_2d(self): + """Test 2D extraction with single element.""" + cp = IDSFactory("3.39.0").core_profiles() + cp.profiles_1d.resize(1) + cp.profiles_1d[0].grid.rho_tor_norm = np.array([0.0, 0.5, 1.0]) + + result = cp.profiles_1d[:].grid.rho_tor_norm + assert result.shape == (1, 3) + + def test_single_dimension_value(self): + """Test accessing a single value in multi-dimensional structure.""" + cp = IDSFactory("3.39.0").core_profiles() + cp.profiles_1d.resize(3) + for p in cp.profiles_1d: + p.ion.resize(2) + for i in p.ion: + i.z_ion = 1.0 + + result = cp.profiles_1d[:].ion[0].z_ion + + # Should be 3 items (one per profile) + assert len(result) == 3 + + def test_slice_of_slice(self): + """Test slicing a slice.""" + cp = IDSFactory("3.39.0").core_profiles() + cp.profiles_1d.resize(10) + for p in cp.profiles_1d: + p.ion.resize(3) + + result1 = cp.profiles_1d[::2].ion # Every other profile's ions + assert result1.shape == (5, 3) + + result2 = result1[:2] # First 2 from each + assert result2.shape == (5, 2) From d862073428b4b773af44c4f668b93703fcfc6e62 Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Fri, 28 Nov 2025 18:37:19 +0100 Subject: [PATCH 31/44] fixed issues with to_array function --- imas/ids_slice.py | 85 +++++++++++++++++++++--------- imas/test/test_multidim_slicing.py | 38 ++++++------- 2 files changed, 75 insertions(+), 48 deletions(-) diff --git a/imas/ids_slice.py b/imas/ids_slice.py index 09b8d69..9e51ac3 100644 --- a/imas/ids_slice.py +++ b/imas/ids_slice.py @@ -77,10 +77,10 @@ def _path(self) -> str: @property def shape(self) -> Tuple[int, ...]: """Get the virtual multi-dimensional shape. - + Returns the shape of the data as if it were organized in a multi-dimensional array, based on the hierarchy of slicing operations performed. - + Returns: Tuple of dimensions. Use with caution for jagged arrays where sizes vary. """ @@ -119,7 +119,7 @@ def __getitem__(self, item: Union[int, slice]) -> Union[Any, "IDSSlice"]: # Preserve structure instead of flattening sliced_elements = [] sliced_sizes = [] - + for array in self._matched_elements: sliced = array[item] if isinstance(sliced, IDSSlice): @@ -134,7 +134,9 @@ def __getitem__(self, item: Union[int, slice]) -> Union[Any, "IDSSlice"]: # Update shape to reflect the sliced structure # Keep first dimensions, update last dimension - new_virtual_shape = self._virtual_shape[:-1] + (sliced_sizes[0] if sliced_sizes else 0,) + new_virtual_shape = self._virtual_shape[:-1] + ( + sliced_sizes[0] if sliced_sizes else 0, + ) new_hierarchy = self._element_hierarchy[:-1] + [sliced_sizes] return IDSSlice( @@ -172,7 +174,9 @@ def __getitem__(self, item: Union[int, slice]) -> Union[Any, "IDSSlice"]: # Update shape to reflect the slice on first dimension new_virtual_shape = (len(sliced_elements),) + self._virtual_shape[1:] - new_element_hierarchy = [len(sliced_elements)] + self._element_hierarchy[1:] + new_element_hierarchy = [ + len(sliced_elements) + ] + self._element_hierarchy[1:] return IDSSlice( self.metadata, @@ -232,11 +236,13 @@ def __getattr__(self, name: str) -> "IDSSlice": if isinstance(child_elements[0], IDSStructArray): # Children are IDSStructArray - track the new dimension child_sizes = [len(arr) for arr in child_elements] - + # New virtual shape: current shape + new dimension - new_virtual_shape = self._virtual_shape + (child_sizes[0] if child_sizes else 0,) + new_virtual_shape = self._virtual_shape + ( + child_sizes[0] if child_sizes else 0, + ) new_hierarchy = self._element_hierarchy + [child_sizes] - + return IDSSlice( child_metadata, child_elements, @@ -249,12 +255,14 @@ def __getattr__(self, name: str) -> "IDSSlice": # Children are IDSNumericArray - track the array dimension # Each IDSNumericArray has a size (length of its data) child_sizes = [len(arr) for arr in child_elements] - - # New virtual shape: current shape + new dimension (size of first numeric array) + + # New virtual shape: current shape + new dimension # Jagged arrays handled by to_array() with object dtype - new_virtual_shape = self._virtual_shape + (child_sizes[0] if child_sizes else 0,) + new_virtual_shape = self._virtual_shape + ( + child_sizes[0] if child_sizes else 0, + ) new_hierarchy = self._element_hierarchy + [child_sizes] - + return IDSSlice( child_metadata, child_elements, @@ -338,7 +346,11 @@ def values(self, reshape: bool = False) -> Any: >>> # Result: ndarray shape (106, 100) >>> >>> # 3D ions case - returns object array with structure - >>> ion_rho = core_profiles.profiles_1d[:].ion[:].element[:].density.values(reshape=True) + >>> ion_rho = ( + ... core_profiles.profiles_1d[:].ion[:].element[:].density.values( + ... reshape=True + ... ) + ... ) >>> # Result: object array shape (106, 3, 2) with IDSNumericArray elements """ from imas.ids_primitive import IDSPrimitive, IDSNumericArray @@ -359,7 +371,9 @@ def values(self, reshape: bool = False) -> Any: if isinstance(element, IDSPrimitive): flat_values.append(element.value) elif isinstance(element, IDSNumericArray): - flat_values.append(element.data if hasattr(element, 'data') else element.value) + flat_values.append( + element.data if hasattr(element, "data") else element.value + ) else: flat_values.append(element) @@ -399,7 +413,8 @@ def to_array(self) -> np.ndarray: structure of the IMAS data. Returns: - numpy.ndarray with shape self.shape. For jagged arrays, dtype will be object. + numpy.ndarray with shape self.shape. For jagged arrays, + dtype will be object. Raises: ValueError: If array cannot be converted to numpy @@ -436,7 +451,9 @@ def to_array(self) -> np.ndarray: # Multi-dimensional case # Check if matched elements are themselves arrays (IDSNumericArray) - if self._matched_elements and isinstance(self._matched_elements[0], IDSNumericArray): + if self._matched_elements and isinstance( + self._matched_elements[0], IDSNumericArray + ): # Elements are numeric arrays - extract their values and stack them array_values = [] for element in self._matched_elements: @@ -444,17 +461,17 @@ def to_array(self) -> np.ndarray: array_values.append(element.value) else: array_values.append(element) - + # Try to stack into proper shape try: # Check if all arrays have the same size (regular) sizes = [] for val in array_values: - if hasattr(val, '__len__'): + if hasattr(val, "__len__"): sizes.append(len(val)) else: sizes.append(1) - + # If all sizes are the same, we can create a regular array if len(set(sizes)) == 1: # Regular array - all sub-arrays same size @@ -478,7 +495,7 @@ def to_array(self) -> np.ndarray: for i, val in enumerate(array_values): result_arr[i] = val return result_arr - except (ValueError, TypeError) as e: + except (ValueError, TypeError): # Fallback: return object array result_arr = np.empty(self._virtual_shape[0], dtype=object) for i, val in enumerate(array_values): @@ -488,11 +505,29 @@ def to_array(self) -> np.ndarray: # For non-numeric elements in multi-dimensional structure # Extract and try to build structure flat_values = [] - for element in self._matched_elements: - if isinstance(element, IDSPrimitive): - flat_values.append(element.value) - else: - flat_values.append(element) + + # First check if matched_elements are IDSStructArray (which need flattening) + from imas.ids_struct_array import IDSStructArray + + has_struct_arrays = self._matched_elements and isinstance( + self._matched_elements[0], IDSStructArray + ) + + if has_struct_arrays: + # Flatten IDSStructArray elements + for struct_array in self._matched_elements: + for element in struct_array: + if isinstance(element, IDSPrimitive): + flat_values.append(element.value) + else: + flat_values.append(element) + else: + # Regular elements + for element in self._matched_elements: + if isinstance(element, IDSPrimitive): + flat_values.append(element.value) + else: + flat_values.append(element) total_size = 1 for dim in self._virtual_shape: diff --git a/imas/test/test_multidim_slicing.py b/imas/test/test_multidim_slicing.py index b0c908b..fbb0475 100644 --- a/imas/test/test_multidim_slicing.py +++ b/imas/test/test_multidim_slicing.py @@ -6,7 +6,6 @@ import pytest from imas.ids_factory import IDSFactory -from imas.ids_slice import IDSSlice class TestMultiDimSlicing: @@ -137,7 +136,7 @@ def test_slice_preserves_groups(self): # Get all ions, then slice result = cp.profiles_1d[:].ion[:] - + # Should still know the structure: 10 profiles, 3 ions each assert result.shape == (10, 3) assert len(result) == 30 # Flattened for iteration, but shape preserved @@ -153,7 +152,7 @@ def test_integer_index_on_nested(self): # Get first ion from all profiles result = cp.profiles_1d[:].ion[0] - + assert len(result) == 5 for i, ion in enumerate(result): assert ion.label == f"ion_{i}_0" @@ -167,7 +166,7 @@ def test_slice_on_nested_arrays(self): # Get first 2 ions from each profile result = cp.profiles_1d[:].ion[:2] - + assert result.shape == (5, 2) assert len(result) == 10 # 5 profiles * 2 ions each @@ -180,7 +179,7 @@ def test_step_slicing_on_nested(self): # Get every other ion result = cp.profiles_1d[:].ion[::2] - + assert result.shape == (5, 3) # 5 profiles, 3 ions each (0, 2, 4) assert len(result) == 15 @@ -195,7 +194,7 @@ def test_negative_indexing_on_nested(self): # Get last ion from each profile result = cp.profiles_1d[:].ion[-1] - + assert len(result) == 5 for ion in result: assert ion.label == "ion_2" @@ -227,7 +226,7 @@ def test_boolean_indexing_simple(self): p.electrons.density = np.array([float(i)] * 5) result = cp.profiles_1d[:].electrons.density - + mask = np.array([True, False, True, False, True]) filtered = result[mask] assert len(filtered) == 3 @@ -239,15 +238,10 @@ def test_assignment_on_slice(self): for p in cp.profiles_1d: p.grid.rho_tor_norm = np.array([0.0, 0.5, 1.0]) - # Assign new values through slice - new_values = np.array([[0.1, 0.6, 1.1], - [0.2, 0.7, 1.2], - [0.3, 0.8, 1.3]]) - # This requires assignment support # cp.profiles_1d[:].grid.rho_tor_norm[:] = new_values # For now, verify slicing works for reading - + result = cp.profiles_1d[:].grid.rho_tor_norm array = result.to_array() assert array.shape == (3, 3) @@ -257,7 +251,7 @@ def test_xarray_integration_compatible(self): cp = IDSFactory("3.39.0").core_profiles() cp.profiles_1d.resize(3) cp.time = np.array([1.0, 2.0, 3.0]) - + for i, p in enumerate(cp.profiles_1d): p.grid.rho_tor_norm = np.array([0.0, 0.5, 1.0]) p.electrons.temperature = np.array([1.0, 2.0, 3.0]) * (i + 1) @@ -274,7 +268,7 @@ def test_performance_large_hierarchy(self): cp = IDSFactory("3.39.0").core_profiles() n_profiles = 50 cp.profiles_1d.resize(n_profiles) - + for p in cp.profiles_1d: p.grid.rho_tor_norm = np.linspace(0, 1, 100) p.ion.resize(5) @@ -284,7 +278,7 @@ def test_performance_large_hierarchy(self): # Should handle large data without significant slowdown result = cp.profiles_1d[:].grid.rho_tor_norm array = result.to_array() - + assert array.shape == (n_profiles, 100) def test_lazy_loading_with_multidim(self): @@ -296,12 +290,10 @@ def test_lazy_loading_with_multidim(self): p.grid.rho_tor_norm = np.array([0.0, 0.5, 1.0]) result = cp.profiles_1d[:].grid.rho_tor_norm - - # Verify lazy attributes are preserved - assert hasattr(result, '_lazy') - assert hasattr(result, '_parent_array') - + # Verify lazy attributes are preserved + assert hasattr(result, "_lazy") + assert hasattr(result, "_parent_array") class TestEdgeCases: @@ -339,7 +331,7 @@ def test_single_dimension_value(self): i.z_ion = 1.0 result = cp.profiles_1d[:].ion[0].z_ion - + # Should be 3 items (one per profile) assert len(result) == 3 @@ -352,6 +344,6 @@ def test_slice_of_slice(self): result1 = cp.profiles_1d[::2].ion # Every other profile's ions assert result1.shape == (5, 3) - + result2 = result1[:2] # First 2 from each assert result2.shape == (5, 2) From b2143f8f203d1fd5607c82fa67b2ad849e82fbc7 Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Fri, 28 Nov 2025 21:34:36 +0100 Subject: [PATCH 32/44] fixed documentation issue --- docs/source/array_slicing.rst | 43 +++++++++++++++++++++++++++++- docs/source/intro.rst | 35 ++++++++++++++++++++++++ imas/ids_slice.py | 23 +++++++--------- imas/test/test_multidim_slicing.py | 4 +-- 4 files changed, 89 insertions(+), 16 deletions(-) diff --git a/docs/source/array_slicing.rst b/docs/source/array_slicing.rst index 43e3294..23aa2cd 100644 --- a/docs/source/array_slicing.rst +++ b/docs/source/array_slicing.rst @@ -27,12 +27,50 @@ Basic Usage subset = cp.profiles_1d[1:5] # IDSSlice every_other = cp.profiles_1d[::2] # IDSSlice - # Access nested arrays (automatic array-wise indexing) + # Access nested arrays all_ions = cp.profiles_1d[:].ion[:] # IDSSlice of individual ions # Extract values labels = all_ions.label.values() +Multi-Dimensional Slicing +--------------------------- + +The ``IDSSlice`` class supports multi-dimensional shape tracking and array conversion. + +**Check shape of sliced data:** + +.. code-block:: python + + # Get shape information for multi-dimensional data + print(cp.profiles_1d[:].grid.shape) # (106,) + print(cp.profiles_1d[:].ion.shape) # (106, ~3) + print(cp.profiles_1d[1:3].ion[0].element.shape) # (2, ~3) + +**Extract values with shape preservation:** + +.. code-block:: python + + # Extract as list + grid_values = cp.profiles_1d[:].grid.values() + + # Extract as numpy array + grid_array = cp.profiles_1d[:].grid.to_array() + + # Extract as numpy array + ion_array = cp.profiles_1d[:].ion.to_array() + +**Nested structure access:** + +.. code-block:: python + + # Access through nested arrays + grid_data = cp.profiles_1d[1:3].grid.rho_tor.to_array() + + # Ion properties across multiple profiles + ion_labels = cp.profiles_1d[:].ion[:].label.to_array() + ion_charges = cp.profiles_1d[:].ion[:].z_ion.to_array() + Common Patterns --------------- @@ -55,6 +93,9 @@ Common Patterns .. code-block:: python times = cp.profiles_1d[:].time.values() + + # Or as numpy array + times_array = cp.profiles_1d[:].time.to_array() Important: Array-wise Indexing ------------------------------- diff --git a/docs/source/intro.rst b/docs/source/intro.rst index 3027a24..7de9f15 100644 --- a/docs/source/intro.rst +++ b/docs/source/intro.rst @@ -154,3 +154,38 @@ can use ``.get()`` to load IDS data from disk: >>> dbentry2 = imas.DBEntry("mypulsefile.nc","r") >>> core_profiles2 = dbentry2.get("core_profiles") >>> print(core_profiles2.ids_properties.comment.value) + + +.. _`Multi-Dimensional Slicing`: + +Multi-Dimensional Slicing +'''''''''''''''''''''''''' + +IMAS-Python supports advanced slicing of hierarchical data structures with automatic +shape tracking and array conversion to numpy. This enables intuitive access to +multi-dimensional scientific data: + +.. code-block:: python + + >>> # Load data + >>> entry = imas.DBEntry("mypulsefile.nc","r") + >>> cp = entry.get("core_profiles", autoconvert=False, lazy=True) + + >>> # Check shape of sliced data + >>> cp.profiles_1d[:].grid.shape + (106,) + >>> cp.profiles_1d[:].ion.shape + (106, ~3) # ~3 ions per profile + + >>> # Extract values + >>> grid_values = cp.profiles_1d[:].grid.to_array() + >>> ion_labels = cp.profiles_1d[:].ion[:].label.to_array() + + >>> # Work with subsets + >>> subset_grid = cp.profiles_1d[1:3].grid.to_array() + >>> subset_ions = cp.profiles_1d[1:3].ion.to_array() + +The ``IDSSlice`` class tracks multi-dimensional shapes and provides both +``.values()`` and ``.to_array()`` (numpy array) +methods for data extraction. For more details, see :ref:`Array Slicing`. + diff --git a/imas/ids_slice.py b/imas/ids_slice.py index 9e51ac3..30bc22a 100644 --- a/imas/ids_slice.py +++ b/imas/ids_slice.py @@ -82,7 +82,7 @@ def shape(self) -> Tuple[int, ...]: array, based on the hierarchy of slicing operations performed. Returns: - Tuple of dimensions. Use with caution for jagged arrays where sizes vary. + Tuple of dimensions. """ return self._virtual_shape @@ -257,7 +257,6 @@ def __getattr__(self, name: str) -> "IDSSlice": child_sizes = [len(arr) for arr in child_elements] # New virtual shape: current shape + new dimension - # Jagged arrays handled by to_array() with object dtype new_virtual_shape = self._virtual_shape + ( child_sizes[0] if child_sizes else 0, ) @@ -327,10 +326,12 @@ def values(self, reshape: bool = False) -> Any: or list of extracted values. Returns: - For 1D: List of raw Python/numpy values or unwrapped elements - For multi-D with reshape=False: List of elements (each being an array) - For multi-D with reshape=True: numpy.ndarray with shape self.shape, - or nested lists/object array representing structure + list or numpy.ndarray: Extracted values as follows: + + - 1D slices: List of raw Python/numpy values or unwrapped elements + - Multi-D with reshape=False: List of elements (each being an array) + - Multi-D with reshape=True: numpy.ndarray with shape self.shape, + or nested lists/object array representing structure Examples: >>> # Get names from identifiers without looping @@ -404,17 +405,15 @@ def values(self, reshape: bool = False) -> Any: def to_array(self) -> np.ndarray: """Convert this slice to a numpy array respecting multi-dimensional structure. - For 1D slices, returns a simple 1D array. - For multi-dimensional slices, returns an array with shape self.shape. - For jagged arrays (varying sizes in lower dimensions), returns an object array. + For 1D slices: returns a simple 1D array. + For multi-dimensional slices: returns an array with shape self.shape. This is useful for integration with numpy operations, scipy functions, and xarray data structures. The returned array preserves the hierarchical structure of the IMAS data. Returns: - numpy.ndarray with shape self.shape. For jagged arrays, - dtype will be object. + numpy.ndarray with shape self.shape. Raises: ValueError: If array cannot be converted to numpy @@ -426,7 +425,6 @@ def to_array(self) -> np.ndarray: >>> print(rho_array.shape) (106, 100) >>> - >>> # Jagged array returns object array >>> ion_density = core_profiles.profiles_1d[:].ion[:].density.to_array() >>> # Result: object array shape (106, 3) with varying sizes >>> @@ -490,7 +488,6 @@ def to_array(self) -> np.ndarray: result_arr.flat[i] = val return result_arr else: - # Jagged array - different sizes result_arr = np.empty(self._virtual_shape[0], dtype=object) for i, val in enumerate(array_values): result_arr[i] = val diff --git a/imas/test/test_multidim_slicing.py b/imas/test/test_multidim_slicing.py index fbb0475..f5fbdae 100644 --- a/imas/test/test_multidim_slicing.py +++ b/imas/test/test_multidim_slicing.py @@ -76,8 +76,8 @@ def test_to_array_3d_regular(self): assert np.allclose(array[0, 0, :], [0.0, 1.0]) assert np.allclose(array[0, 1, :], [0.0, 1.0]) - def test_to_array_jagged(self): - """Test to_array() with jagged (variable-size) arrays.""" + def test_to_array_variable_size(self): + """Test to_array() with variable-size arrays.""" cp = IDSFactory("3.39.0").core_profiles() cp.profiles_1d.resize(3) cp.profiles_1d[0].grid.rho_tor_norm = np.array([0.0, 0.5, 1.0]) From 0aa5a2c9feedcb2e322c56bda88af62dda66920f Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Sun, 30 Nov 2025 00:06:45 +0100 Subject: [PATCH 33/44] fixed issue in sphinx doc generation --- docs/source/array_slicing.rst | 4 +++- docs/source/intro.rst | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/source/array_slicing.rst b/docs/source/array_slicing.rst index 23aa2cd..1daf873 100644 --- a/docs/source/array_slicing.rst +++ b/docs/source/array_slicing.rst @@ -1,5 +1,7 @@ +.. _array-slicing: + Array Slicing -============== +============= The ``IDSStructArray`` class supports Python's standard slicing syntax. diff --git a/docs/source/intro.rst b/docs/source/intro.rst index 7de9f15..125b407 100644 --- a/docs/source/intro.rst +++ b/docs/source/intro.rst @@ -187,5 +187,5 @@ multi-dimensional scientific data: The ``IDSSlice`` class tracks multi-dimensional shapes and provides both ``.values()`` and ``.to_array()`` (numpy array) -methods for data extraction. For more details, see :ref:`Array Slicing`. +methods for data extraction. For more details, see :ref:`array-slicing`. From 56f154ec03953d3139bfb463583b41097a4ba2bf Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Mon, 5 Jan 2026 09:44:57 +0100 Subject: [PATCH 34/44] raise value error when ragged arrays found https://github.com/iterorganization/IMAS-Python/pull/20#discussion_r2580171062 --- imas/ids_slice.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/imas/ids_slice.py b/imas/ids_slice.py index 30bc22a..11a8984 100644 --- a/imas/ids_slice.py +++ b/imas/ids_slice.py @@ -74,6 +74,24 @@ def _path(self) -> str: """Return the path representation of this slice.""" return self._slice_path + @property + def is_ragged(self) -> bool: + """Check if the underlying data is ragged (non-rectangular). + + Ragged arrays have varying sizes at one or more dimensions. + + Returns: + True if any dimension has varying sizes, False otherwise + + """ + # Check if any level in the hierarchy has non-uniform sizes + for sizes_list in self._element_hierarchy: + # sizes_list can be a list of sizes or a single integer + if isinstance(sizes_list, list) and len(sizes_list) > 1: + if len(set(sizes_list)) > 1: + return True + return False + @property def shape(self) -> Tuple[int, ...]: """Get the virtual multi-dimensional shape. @@ -81,9 +99,20 @@ def shape(self) -> Tuple[int, ...]: Returns the shape of the data as if it were organized in a multi-dimensional array, based on the hierarchy of slicing operations performed. + Raises: + ValueError: The underlying data is ragged (non-rectangular). Use + .is_ragged to check first, or use + .to_array() to convert to a numpy object array. + Returns: Tuple of dimensions. """ + if self.is_ragged: + raise ValueError( + f"Cannot get shape of ragged array: dimensions have varying sizes. " + f"Use .is_ragged to check if data is ragged, or .to_array() to " + f"convert to numpy object array." + ) return self._virtual_shape def __len__(self) -> int: From b92883c238ac611741c37869bc5e8a3bd2dd5bd6 Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Mon, 5 Jan 2026 10:25:41 +0100 Subject: [PATCH 35/44] Removed Union and simplified __getitem__ method https://github.com/iterorganization/IMAS-Python/pull/20#discussion_r2580175890 --- imas/ids_slice.py | 174 ++++++++++++++++++++++++++++------------------ 1 file changed, 105 insertions(+), 69 deletions(-) diff --git a/imas/ids_slice.py b/imas/ids_slice.py index 11a8984..15b871c 100644 --- a/imas/ids_slice.py +++ b/imas/ids_slice.py @@ -123,7 +123,7 @@ def __iter__(self) -> Iterator[Any]: """Iterate over all matched elements.""" return iter(self._matched_elements) - def __getitem__(self, item: Union[int, slice]) -> Union[Any, "IDSSlice"]: + def __getitem__(self, item: Union[int, slice]) -> "IDSSlice": """Get element(s) from the slice. When the matched elements are IDSStructArray objects, the indexing @@ -140,84 +140,120 @@ def __getitem__(self, item: Union[int, slice]) -> Union[Any, "IDSSlice"]: """ from imas.ids_struct_array import IDSStructArray - # Array-wise indexing: apply operation to each IDSStructArray element + # Check if we have array-wise indexing (elements are IDSStructArray) if self._matched_elements and isinstance( self._matched_elements[0], IDSStructArray ): if isinstance(item, slice): - # Preserve structure instead of flattening - sliced_elements = [] - sliced_sizes = [] - - for array in self._matched_elements: - sliced = array[item] - if isinstance(sliced, IDSSlice): - sliced_elements.extend(sliced._matched_elements) - sliced_sizes.append(len(sliced)) - else: - sliced_elements.append(sliced) - sliced_sizes.append(1) - - slice_str = self._format_slice(item) - new_path = self._slice_path + slice_str - - # Update shape to reflect the sliced structure - # Keep first dimensions, update last dimension - new_virtual_shape = self._virtual_shape[:-1] + ( - sliced_sizes[0] if sliced_sizes else 0, - ) - new_hierarchy = self._element_hierarchy[:-1] + [sliced_sizes] - - return IDSSlice( - self.metadata, - sliced_elements, - new_path, - parent_array=self._parent_array, - virtual_shape=new_virtual_shape, - element_hierarchy=new_hierarchy, - ) + return self._handle_array_wise_slice(item) else: - # Integer indexing on arrays - indexed_elements = [] - for array in self._matched_elements: - indexed_elements.append(array[int(item)]) - - new_path = self._slice_path + f"[{item}]" - - # Shape changes: last dimension becomes 1 - new_virtual_shape = self._virtual_shape[:-1] + (1,) - - return IDSSlice( - self.metadata, - indexed_elements, - new_path, - parent_array=self._parent_array, - virtual_shape=new_virtual_shape, - element_hierarchy=self._element_hierarchy, - ) + return self._handle_array_wise_integer(item) else: if isinstance(item, slice): - sliced_elements = self._matched_elements[item] - slice_str = self._format_slice(item) - new_path = self._slice_path + slice_str - - # Update shape to reflect the slice on first dimension - new_virtual_shape = (len(sliced_elements),) + self._virtual_shape[1:] - new_element_hierarchy = [ - len(sliced_elements) - ] + self._element_hierarchy[1:] - - return IDSSlice( - self.metadata, - sliced_elements, - new_path, - parent_array=self._parent_array, - virtual_shape=new_virtual_shape, - element_hierarchy=new_element_hierarchy, - ) + return self._handle_list_slice(item) else: return self._matched_elements[int(item)] + def _handle_array_wise_slice(self, item: slice) -> "IDSSlice": + """Apply a slice operation array-wise to IDSStructArray elements. + + Applies the slice to each array element and preserves the grouping + structure for multi-dimensional shapes. + + Args: + item: The slice object to apply + + Returns: + IDSSlice with updated shape and hierarchy + """ + sliced_elements = [] + sliced_sizes = [] + + for array in self._matched_elements: + sliced = array[item] + if isinstance(sliced, IDSSlice): + sliced_elements.extend(sliced._matched_elements) + sliced_sizes.append(len(sliced)) + else: + sliced_elements.append(sliced) + sliced_sizes.append(1) + + slice_str = self._format_slice(item) + new_path = self._slice_path + slice_str + + # Update shape to reflect the sliced structure + # Keep first dimensions, update last dimension + new_virtual_shape = self._virtual_shape[:-1] + ( + sliced_sizes[0] if sliced_sizes else 0, + ) + new_hierarchy = self._element_hierarchy[:-1] + [sliced_sizes] + + return IDSSlice( + self.metadata, + sliced_elements, + new_path, + parent_array=self._parent_array, + virtual_shape=new_virtual_shape, + element_hierarchy=new_hierarchy, + ) + + def _handle_array_wise_integer(self, item: int) -> "IDSSlice": + """Apply integer indexing array-wise to IDSStructArray elements. + + Applies the integer index to each array element, reducing the last + dimension to size 1. + + Args: + item: The integer index to apply + + Returns: + IDSSlice with updated shape + """ + indexed_elements = [array[int(item)] for array in self._matched_elements] + + new_path = self._slice_path + f"[{item}]" + + # Shape changes: last dimension becomes 1 + new_virtual_shape = self._virtual_shape[:-1] + (1,) + + return IDSSlice( + self.metadata, + indexed_elements, + new_path, + parent_array=self._parent_array, + virtual_shape=new_virtual_shape, + element_hierarchy=self._element_hierarchy, + ) + + def _handle_list_slice(self, item: slice) -> "IDSSlice": + """Apply a slice operation to the matched elements list. + + Updates the first dimension of the shape to reflect the new + number of elements after slicing. + + Args: + item: The slice object to apply + + Returns: + IDSSlice with updated shape and hierarchy + """ + sliced_elements = self._matched_elements[item] + slice_str = self._format_slice(item) + new_path = self._slice_path + slice_str + + # Update shape to reflect the slice on first dimension + new_virtual_shape = (len(sliced_elements),) + self._virtual_shape[1:] + new_element_hierarchy = [len(sliced_elements)] + self._element_hierarchy[1:] + + return IDSSlice( + self.metadata, + sliced_elements, + new_path, + parent_array=self._parent_array, + virtual_shape=new_virtual_shape, + element_hierarchy=new_element_hierarchy, + ) + def __getattr__(self, name: str) -> "IDSSlice": """Access a child node on all matched elements. From 0a4658d5b732b5c01f1002e3cfcafa1c0800a9f5 Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Mon, 5 Jan 2026 11:34:25 +0100 Subject: [PATCH 36/44] Check if node is valid and return empty slice if array is empty https://github.com/iterorganization/IMAS-Python/pull/20#discussion_r2580213419 --- imas/ids_slice.py | 49 ++++++++++++++++++++++-------- imas/test/test_multidim_slicing.py | 5 +-- 2 files changed, 39 insertions(+), 15 deletions(-) diff --git a/imas/ids_slice.py b/imas/ids_slice.py index 15b871c..d699a3c 100644 --- a/imas/ids_slice.py +++ b/imas/ids_slice.py @@ -257,36 +257,61 @@ def _handle_list_slice(self, item: slice) -> "IDSSlice": def __getattr__(self, name: str) -> "IDSSlice": """Access a child node on all matched elements. - This returns a new IDSSlice containing the child node from - each matched element. Preserves multi-dimensional structure - when child elements are arrays. + Returns a new IDSSlice containing the child node from each matched + element. Validates the attribute name against metadata, allowing + empty slices with valid child node names. Args: name: Name of the node to access Returns: - A new IDSSlice containing the child node from each matched element - """ - if not self._matched_elements: - raise IndexError( - f"Cannot access node '{name}' on empty slice with 0 elements" - ) + A new IDSSlice containing the child node from each matched element, + or an empty IDSSlice if the matched_elements is empty but the + attribute name is valid according to metadata. + Raises: + AttributeError: If name is not a valid child node in the metadata + """ from imas.ids_struct_array import IDSStructArray + from imas.ids_primitive import IDSNumericArray + # Validate attribute name via metadata first child_metadata = None if self.metadata is not None: try: child_metadata = self.metadata[name] except (KeyError, TypeError): - pass + raise AttributeError( + f"'{self.metadata.name}' has no child node '{name}'" + ) from None + else: + # No metadata available for validation + # Try to get the attribute anyway, will fail if invalid + if not self._matched_elements: + raise AttributeError( + f"Cannot validate attribute '{name}' on empty slice " + f"without metadata" + ) from None + + # Handle empty slice - valid if metadata says it's a valid node + if not self._matched_elements: + new_path = self._slice_path + "." + name + return IDSSlice( + child_metadata, + [], + new_path, + parent_array=self._parent_array, + virtual_shape=(0,), + element_hierarchy=[0], + ) + # Get attributes from all non-empty matched elements child_elements = [getattr(element, name) for element in self] new_path = self._slice_path + "." + name # Check if children are IDSStructArray (nested arrays) or IDSNumericArray if not child_elements: - # Empty slice + # Empty child elements return IDSSlice( child_metadata, child_elements, @@ -296,8 +321,6 @@ def __getattr__(self, name: str) -> "IDSSlice": element_hierarchy=self._element_hierarchy, ) - from imas.ids_primitive import IDSNumericArray - if isinstance(child_elements[0], IDSStructArray): # Children are IDSStructArray - track the new dimension child_sizes = [len(arr) for arr in child_elements] diff --git a/imas/test/test_multidim_slicing.py b/imas/test/test_multidim_slicing.py index f5fbdae..43925c2 100644 --- a/imas/test/test_multidim_slicing.py +++ b/imas/test/test_multidim_slicing.py @@ -291,9 +291,10 @@ def test_lazy_loading_with_multidim(self): result = cp.profiles_1d[:].grid.rho_tor_norm - # Verify lazy attributes are preserved - assert hasattr(result, "_lazy") + # Verify IDSSlice attributes are preserved assert hasattr(result, "_parent_array") + assert hasattr(result, "_matched_elements") + assert len(result._matched_elements) > 0 class TestEdgeCases: From d91d25b4ce02a734bf6f7dc581c39ae77f772a0e Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Mon, 5 Jan 2026 11:40:00 +0100 Subject: [PATCH 37/44] IDSSlice should have valid metadata, https://github.com/iterorganization/IMAS-Python/pull/20#discussion_r2580217742 --- imas/ids_slice.py | 30 ++++++++++-------------------- 1 file changed, 10 insertions(+), 20 deletions(-) diff --git a/imas/ids_slice.py b/imas/ids_slice.py index d699a3c..3669f5b 100644 --- a/imas/ids_slice.py +++ b/imas/ids_slice.py @@ -31,7 +31,7 @@ class IDSSlice: - Iteration over matched elements Attributes: - metadata: Metadata from the parent array, or None if not available + metadata: Metadata from the parent array (always present) """ __slots__ = [ @@ -45,7 +45,7 @@ class IDSSlice: def __init__( self, - metadata: Optional[IDSMetadata], + metadata: IDSMetadata, matched_elements: List[Any], slice_path: str, parent_array: Optional["IDSStructArray"] = None, @@ -55,7 +55,7 @@ def __init__( """Initialize IDSSlice. Args: - metadata: Metadata from the parent array + metadata: Metadata from the parent array (required) matched_elements: List of elements that matched the slice slice_path: String representation of the slice operation (e.g., "[8:]") parent_array: Optional reference to the parent IDSStructArray for context @@ -275,23 +275,13 @@ def __getattr__(self, name: str) -> "IDSSlice": from imas.ids_struct_array import IDSStructArray from imas.ids_primitive import IDSNumericArray - # Validate attribute name via metadata first - child_metadata = None - if self.metadata is not None: - try: - child_metadata = self.metadata[name] - except (KeyError, TypeError): - raise AttributeError( - f"'{self.metadata.name}' has no child node '{name}'" - ) from None - else: - # No metadata available for validation - # Try to get the attribute anyway, will fail if invalid - if not self._matched_elements: - raise AttributeError( - f"Cannot validate attribute '{name}' on empty slice " - f"without metadata" - ) from None + # Validate attribute name via metadata + try: + child_metadata = self.metadata[name] + except (KeyError, TypeError): + raise AttributeError( + f"'{self.metadata.name}' has no child node '{name}'" + ) from None # Handle empty slice - valid if metadata says it's a valid node if not self._matched_elements: From 74e0d73071569b5fd59c9009aec38352aa2dec89 Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Mon, 5 Jan 2026 11:50:06 +0100 Subject: [PATCH 38/44] the size of all items is not the same as the first, there may be ragged arrays. https://github.com/iterorganization/IMAS-Python/pull/20#discussion_r2580265468 --- imas/ids_slice.py | 135 +++++++++++++++++++++++++--------------------- 1 file changed, 75 insertions(+), 60 deletions(-) diff --git a/imas/ids_slice.py b/imas/ids_slice.py index 3669f5b..ba8c7e1 100644 --- a/imas/ids_slice.py +++ b/imas/ids_slice.py @@ -113,7 +113,18 @@ def shape(self) -> Tuple[int, ...]: f"Use .is_ragged to check if data is ragged, or .to_array() to " f"convert to numpy object array." ) - return self._virtual_shape + + # Build shape from hierarchy, replacing None with actual uniform size + shape = [] + for hierarchy_level in self._element_hierarchy: + if isinstance(hierarchy_level, list): + # This is a list of sizes - get the uniform size (we checked is_ragged) + shape.append(hierarchy_level[0]) + else: + # This is a single count + shape.append(hierarchy_level) + + return tuple(shape) def __len__(self) -> int: """Return the number of elements matched by this slice.""" @@ -182,10 +193,8 @@ def _handle_array_wise_slice(self, item: slice) -> "IDSSlice": new_path = self._slice_path + slice_str # Update shape to reflect the sliced structure - # Keep first dimensions, update last dimension - new_virtual_shape = self._virtual_shape[:-1] + ( - sliced_sizes[0] if sliced_sizes else 0, - ) + # Keep first dimensions, store actual sizes (may be ragged) + new_virtual_shape = self._virtual_shape[:-1] + (None,) new_hierarchy = self._element_hierarchy[:-1] + [sliced_sizes] return IDSSlice( @@ -316,9 +325,8 @@ def __getattr__(self, name: str) -> "IDSSlice": child_sizes = [len(arr) for arr in child_elements] # New virtual shape: current shape + new dimension - new_virtual_shape = self._virtual_shape + ( - child_sizes[0] if child_sizes else 0, - ) + # Store actual sizes (may be ragged) - don't assume all are the same! + new_virtual_shape = self._virtual_shape + (None,) new_hierarchy = self._element_hierarchy + [child_sizes] return IDSSlice( @@ -335,9 +343,8 @@ def __getattr__(self, name: str) -> "IDSSlice": child_sizes = [len(arr) for arr in child_elements] # New virtual shape: current shape + new dimension - new_virtual_shape = self._virtual_shape + ( - child_sizes[0] if child_sizes else 0, - ) + # Store actual sizes (may be ragged) - don't assume all are the same! + new_virtual_shape = self._virtual_shape + (None,) new_hierarchy = self._element_hierarchy + [child_sizes] return IDSSlice( @@ -410,27 +417,6 @@ def values(self, reshape: bool = False) -> Any: - Multi-D with reshape=False: List of elements (each being an array) - Multi-D with reshape=True: numpy.ndarray with shape self.shape, or nested lists/object array representing structure - - Examples: - >>> # Get names from identifiers without looping - >>> n = edge_profiles.grid_ggd[0].grid_subset[:].identifier.name.values() - >>> # Result: ["nodes", "edges", "cells"] - >>> - >>> # Get 2D array but as list of arrays (default) - >>> rho = core_profiles.profiles_1d[:].grid.rho_tor.values() - >>> # Result: [ndarray(100,), ndarray(100,), ...] - list of 106 arrays - >>> - >>> # Get 2D array reshaped to (106, 100) - >>> rho = core_profiles.profiles_1d[:].grid.rho_tor.values(reshape=True) - >>> # Result: ndarray shape (106, 100) - >>> - >>> # 3D ions case - returns object array with structure - >>> ion_rho = ( - ... core_profiles.profiles_1d[:].ion[:].element[:].density.values( - ... reshape=True - ... ) - ... ) - >>> # Result: object array shape (106, 3, 2) with IDSNumericArray elements """ from imas.ids_primitive import IDSPrimitive, IDSNumericArray @@ -445,6 +431,19 @@ def values(self, reshape: bool = False) -> Any: return result # Multi-dimensional case with reshape requested + # Get the actual shape (handles None values in _virtual_shape) + try: + actual_shape = self.shape # Will raise if ragged + except ValueError: + # If ragged, just return flat list + result = [] + for element in self._matched_elements: + if isinstance(element, IDSPrimitive): + result.append(element.value) + else: + result.append(element) + return result + flat_values = [] for element in self._matched_elements: if isinstance(element, IDSPrimitive): @@ -457,26 +456,26 @@ def values(self, reshape: bool = False) -> Any: flat_values.append(element) # For 1D, just return as is - if len(self._virtual_shape) == 1: + if len(actual_shape) == 1: return flat_values # Try to reshape to multi-dimensional shape try: # Calculate total size total_size = 1 - for dim in self._virtual_shape: + for dim in actual_shape: total_size *= dim # Check if sizes match if len(flat_values) == total_size: # Successfully reshape to multi-dimensional - return np.array(flat_values, dtype=object).reshape(self._virtual_shape) + return np.array(flat_values, dtype=object).reshape(actual_shape) except (ValueError, TypeError): pass # If reshape fails or not all elements are extractable, return as object array try: - return np.array(flat_values, dtype=object).reshape(self._virtual_shape[0:1]) + return np.array(flat_values, dtype=object).reshape(actual_shape[0:1]) except (ValueError, TypeError): return flat_values @@ -485,35 +484,31 @@ def to_array(self) -> np.ndarray: For 1D slices: returns a simple 1D array. For multi-dimensional slices: returns an array with shape self.shape. + For ragged data: returns an object array containing the elements as-is. This is useful for integration with numpy operations, scipy functions, and xarray data structures. The returned array preserves the hierarchical structure of the IMAS data. Returns: - numpy.ndarray with shape self.shape. + numpy.ndarray with shape self.shape, or object array if ragged. Raises: ValueError: If array cannot be converted to numpy - - Examples: - >>> # Convert 2D slice to numpy array - >>> rho_array = core_profiles.profiles_1d[:].grid.rho_tor.to_array() - >>> # Result: ndarray shape (106, 100), dtype float64 - >>> print(rho_array.shape) - (106, 100) - >>> - >>> ion_density = core_profiles.profiles_1d[:].ion[:].density.to_array() - >>> # Result: object array shape (106, 3) with varying sizes - >>> - >>> # Can be used directly with numpy functions - >>> mean_rho = np.mean(rho_array, axis=1) - >>> # Result: (106,) array of mean values """ from imas.ids_primitive import IDSPrimitive, IDSNumericArray + # Try to get the actual shape (will check if ragged) + try: + actual_shape = self.shape # Will raise if ragged + is_ragged_data = False + except ValueError: + # Data is ragged - handle it gracefully + is_ragged_data = True + actual_shape = None + # 1D case - simple conversion - if len(self._virtual_shape) == 1: + if not is_ragged_data and len(actual_shape) == 1: flat_values = [] for element in self._matched_elements: if isinstance(element, IDSPrimitive): @@ -538,6 +533,10 @@ def to_array(self) -> np.ndarray: else: array_values.append(element) + # For ragged data, return object array with arrays as elements + if is_ragged_data: + return np.array(array_values, dtype=object) + # Try to stack into proper shape try: # Check if all arrays have the same size (regular) @@ -553,26 +552,26 @@ def to_array(self) -> np.ndarray: # Regular array - all sub-arrays same size stacked = np.array(array_values) # Should now have shape (first_dim, second_dim) - if stacked.shape == self._virtual_shape: + if stacked.shape == actual_shape: return stacked else: # Try explicit reshape try: - return stacked.reshape(self._virtual_shape) + return stacked.reshape(actual_shape) except ValueError: # If reshape fails, return as object array - result_arr = np.empty(self._virtual_shape, dtype=object) + result_arr = np.empty(actual_shape, dtype=object) for i, val in enumerate(array_values): result_arr.flat[i] = val return result_arr else: - result_arr = np.empty(self._virtual_shape[0], dtype=object) + result_arr = np.empty(actual_shape[0], dtype=object) for i, val in enumerate(array_values): result_arr[i] = val return result_arr except (ValueError, TypeError): # Fallback: return object array - result_arr = np.empty(self._virtual_shape[0], dtype=object) + result_arr = np.empty(actual_shape[0], dtype=object) for i, val in enumerate(array_values): result_arr[i] = val return result_arr @@ -604,8 +603,24 @@ def to_array(self) -> np.ndarray: else: flat_values.append(element) + # For ragged data, construct object array from hierarchy + if is_ragged_data: + # Build object array respecting the ragged structure + if len(self._element_hierarchy) == 1: + # Simple 1D array + return np.array(flat_values, dtype=object) + else: + # Multi-level hierarchy - reconstruct structure + result_arr = np.empty(self._element_hierarchy[0], dtype=object) + idx = 0 + for i in range(self._element_hierarchy[0]): + group_size = self._element_hierarchy[1][i] + result_arr[i] = flat_values[idx:idx+group_size] + idx += group_size + return result_arr + total_size = 1 - for dim in self._virtual_shape: + for dim in actual_shape: total_size *= dim # Check if we have the right number of elements @@ -620,10 +635,10 @@ def to_array(self) -> np.ndarray: arr = np.array(flat_values) try: # Try to reshape to target shape - return arr.reshape(self._virtual_shape) + return arr.reshape(actual_shape) except (ValueError, TypeError): # If reshape fails, use object array - arr_obj = np.empty(self._virtual_shape, dtype=object) + arr_obj = np.empty(actual_shape, dtype=object) for i, val in enumerate(flat_values): arr_obj.flat[i] = val return arr_obj From 16a608a50cdb8492dbe4f452c69d5545625dc5d5 Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Mon, 5 Jan 2026 13:23:11 +0100 Subject: [PATCH 39/44] added property to get ids name in IDSMetadata class, build IDSSlice with full path rather than slice path https://github.com/iterorganization/IMAS-Python/pull/20#discussion_r2580441570 --- imas/ids_metadata.py | 14 ++++++++++ imas/ids_slice.py | 56 ++++++++++++++++++---------------------- imas/ids_struct_array.py | 10 +++++-- 3 files changed, 47 insertions(+), 33 deletions(-) diff --git a/imas/ids_metadata.py b/imas/ids_metadata.py index 4d2d5db..cb45886 100644 --- a/imas/ids_metadata.py +++ b/imas/ids_metadata.py @@ -287,6 +287,20 @@ def __getitem__(self, path) -> "IDSMetadata": ) from None return item + @property + def ids_name(self) -> str: + """Get the root IDS name (e.g., 'core_profiles', 'equilibrium'). + + Traverses up the metadata hierarchy to find the toplevel IDS name. + + Returns: + The name of the root IDS node. + """ + current = self + while current._parent is not None: + current = current._parent + return current.name + @property def identifier_enum(self) -> Optional[Type[IDSIdentifier]]: """The identifier enum for this IDS node (if available). diff --git a/imas/ids_slice.py b/imas/ids_slice.py index ba8c7e1..1107a7a 100644 --- a/imas/ids_slice.py +++ b/imas/ids_slice.py @@ -47,7 +47,7 @@ def __init__( self, metadata: IDSMetadata, matched_elements: List[Any], - slice_path: str, + full_path: str, parent_array: Optional["IDSStructArray"] = None, virtual_shape: Optional[Tuple[int, ...]] = None, element_hierarchy: Optional[List[Any]] = None, @@ -57,14 +57,14 @@ def __init__( Args: metadata: Metadata from the parent array (required) matched_elements: List of elements that matched the slice - slice_path: String representation of the slice operation (e.g., "[8:]") + full_path: Full path from the IDS root (e.g., "profiles_1d[:].ion[:]") parent_array: Optional reference to the parent IDSStructArray for context virtual_shape: Optional tuple representing multi-dimensional shape element_hierarchy: Optional tracking of element grouping """ self.metadata = metadata self._matched_elements = matched_elements - self._slice_path = slice_path + self._slice_path = full_path self._parent_array = parent_array self._virtual_shape = virtual_shape or (len(matched_elements),) self._element_hierarchy = element_hierarchy or [len(matched_elements)] @@ -190,7 +190,8 @@ def _handle_array_wise_slice(self, item: slice) -> "IDSSlice": sliced_sizes.append(1) slice_str = self._format_slice(item) - new_path = self._slice_path + slice_str + # Full path: current path + slice operation + full_path = self._path + slice_str # Update shape to reflect the sliced structure # Keep first dimensions, store actual sizes (may be ragged) @@ -200,7 +201,7 @@ def _handle_array_wise_slice(self, item: slice) -> "IDSSlice": return IDSSlice( self.metadata, sliced_elements, - new_path, + full_path, parent_array=self._parent_array, virtual_shape=new_virtual_shape, element_hierarchy=new_hierarchy, @@ -220,7 +221,8 @@ def _handle_array_wise_integer(self, item: int) -> "IDSSlice": """ indexed_elements = [array[int(item)] for array in self._matched_elements] - new_path = self._slice_path + f"[{item}]" + # Full path: current path + index operation + full_path = self._path + f"[{item}]" # Shape changes: last dimension becomes 1 new_virtual_shape = self._virtual_shape[:-1] + (1,) @@ -228,7 +230,7 @@ def _handle_array_wise_integer(self, item: int) -> "IDSSlice": return IDSSlice( self.metadata, indexed_elements, - new_path, + full_path, parent_array=self._parent_array, virtual_shape=new_virtual_shape, element_hierarchy=self._element_hierarchy, @@ -248,7 +250,8 @@ def _handle_list_slice(self, item: slice) -> "IDSSlice": """ sliced_elements = self._matched_elements[item] slice_str = self._format_slice(item) - new_path = self._slice_path + slice_str + # Full path: current path + slice operation + full_path = self._path + slice_str # Update shape to reflect the slice on first dimension new_virtual_shape = (len(sliced_elements),) + self._virtual_shape[1:] @@ -257,7 +260,7 @@ def _handle_list_slice(self, item: slice) -> "IDSSlice": return IDSSlice( self.metadata, sliced_elements, - new_path, + full_path, parent_array=self._parent_array, virtual_shape=new_virtual_shape, element_hierarchy=new_element_hierarchy, @@ -292,13 +295,15 @@ def __getattr__(self, name: str) -> "IDSSlice": f"'{self.metadata.name}' has no child node '{name}'" ) from None + # Full path: current path + attribute access + full_path = self._path + "." + name + # Handle empty slice - valid if metadata says it's a valid node if not self._matched_elements: - new_path = self._slice_path + "." + name return IDSSlice( child_metadata, [], - new_path, + full_path, parent_array=self._parent_array, virtual_shape=(0,), element_hierarchy=[0], @@ -306,7 +311,6 @@ def __getattr__(self, name: str) -> "IDSSlice": # Get attributes from all non-empty matched elements child_elements = [getattr(element, name) for element in self] - new_path = self._slice_path + "." + name # Check if children are IDSStructArray (nested arrays) or IDSNumericArray if not child_elements: @@ -314,7 +318,7 @@ def __getattr__(self, name: str) -> "IDSSlice": return IDSSlice( child_metadata, child_elements, - new_path, + full_path, parent_array=self._parent_array, virtual_shape=self._virtual_shape, element_hierarchy=self._element_hierarchy, @@ -332,7 +336,7 @@ def __getattr__(self, name: str) -> "IDSSlice": return IDSSlice( child_metadata, child_elements, - new_path, + full_path, parent_array=self._parent_array, virtual_shape=new_virtual_shape, element_hierarchy=new_hierarchy, @@ -350,7 +354,7 @@ def __getattr__(self, name: str) -> "IDSSlice": return IDSSlice( child_metadata, child_elements, - new_path, + full_path, parent_array=self._parent_array, virtual_shape=new_virtual_shape, element_hierarchy=new_hierarchy, @@ -360,7 +364,7 @@ def __getattr__(self, name: str) -> "IDSSlice": return IDSSlice( child_metadata, child_elements, - new_path, + full_path, parent_array=self._parent_array, virtual_shape=self._virtual_shape, element_hierarchy=self._element_hierarchy, @@ -371,26 +375,16 @@ def __repr__(self) -> str: Returns a string showing: - The IDS type name (e.g., 'equilibrium') - - The full path including the slice operation (e.g., 'time_slice[:]') + - The full path including slice operations (e.g., 'profiles_1d[:].ion[:]') - The number of matched elements Returns: - String representation like below - like '' + String representation like: + '' """ - from imas.util import get_toplevel, get_full_path - - my_repr = f"<{type(self).__name__}" - ids_name = "unknown" - full_path = self._path - - if self._parent_array is not None: - ids_name = get_toplevel(self._parent_array).metadata.name - parent_array_path = get_full_path(self._parent_array) - full_path = parent_array_path + self._path + ids_name = self.metadata.ids_name item_word = "item" if len(self) == 1 else "items" - my_repr += f" (IDS:{ids_name}, {full_path} with {len(self)} {item_word})>" - return my_repr + return f"<{type(self).__name__} (IDS:{ids_name}, {self._path} with {len(self)} {item_word})>" def values(self, reshape: bool = False) -> Any: """Extract raw values from elements in this slice. diff --git a/imas/ids_struct_array.py b/imas/ids_struct_array.py index b06396b..d6b892c 100644 --- a/imas/ids_struct_array.py +++ b/imas/ids_struct_array.py @@ -144,25 +144,31 @@ def __getitem__(self, item): loaded_elements.append(self.value[i]) from imas.ids_slice import IDSSlice + from imas.util import get_full_path slice_str = IDSSlice._format_slice(item) + # Build full path: parent path + this array name + slice + full_path = get_full_path(self) + slice_str return IDSSlice( self.metadata, loaded_elements, - slice_str, + full_path, parent_array=self, ) from imas.ids_slice import IDSSlice + from imas.util import get_full_path matched_elements = self.value[item] slice_str = IDSSlice._format_slice(item) + # Build full path: parent path + this array name + slice + full_path = get_full_path(self) + slice_str return IDSSlice( self.metadata, matched_elements, - slice_str, + full_path, parent_array=self, ) else: From 7d8abaae0229a5733afc9a55c08376b0c51f9e69 Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Mon, 5 Jan 2026 14:03:51 +0100 Subject: [PATCH 40/44] simplified values method according to suggestions from Maarten https://github.com/iterorganization/IMAS-Python/pull/20#discussion_r2580467340 --- imas/ids_slice.py | 103 ++++++++++++++-------------------------------- 1 file changed, 32 insertions(+), 71 deletions(-) diff --git a/imas/ids_slice.py b/imas/ids_slice.py index 1107a7a..4edf5ce 100644 --- a/imas/ids_slice.py +++ b/imas/ids_slice.py @@ -386,92 +386,53 @@ def __repr__(self) -> str: item_word = "item" if len(self) == 1 else "items" return f"<{type(self).__name__} (IDS:{ids_name}, {self._path} with {len(self)} {item_word})>" - def values(self, reshape: bool = False) -> Any: + def values(self) -> List[Any]: """Extract raw values from elements in this slice. For IDSPrimitive elements, this extracts the wrapped value. For other element types, returns them as-is. - For multi-dimensional slices (when shape has multiple dimensions), - this extracts values respecting the multi-dimensional structure. + Returns a flat list of extracted values. This is useful for getting + the actual data without the IDS wrapper when accessing scalar fields + through a slice, without requiring explicit looping through the + original collection. - This is useful for getting the actual data without the IDS wrapper - when accessing scalar fields through a slice, without requiring - explicit looping through the original collection. - - Args: - reshape: If True, reshape result to match self.shape for - multi-dimensional slices. If False (default), return flat list - or list of extracted values. + For multi-dimensional access to values: + - Use direct indexing: ``ids_obj[i1].collection[i2].value`` for best + performance and clarity + - Use ``.to_array()`` if you need numpy array integration Returns: - list or numpy.ndarray: Extracted values as follows: + List of raw Python/numpy values or unwrapped elements - - 1D slices: List of raw Python/numpy values or unwrapped elements - - Multi-D with reshape=False: List of elements (each being an array) - - Multi-D with reshape=True: numpy.ndarray with shape self.shape, - or nested lists/object array representing structure - """ - from imas.ids_primitive import IDSPrimitive, IDSNumericArray + Examples: + Extract scalar values from a 1D slice:: - # Default behavior: return flat list without reshape - if not reshape: - result = [] - for element in self._matched_elements: - if isinstance(element, IDSPrimitive): - result.append(element.value) - else: - result.append(element) - return result + # Get list of temperatures from all profiles + temps = core_profiles.profiles_1d[:].te.values() - # Multi-dimensional case with reshape requested - # Get the actual shape (handles None values in _virtual_shape) - try: - actual_shape = self.shape # Will raise if ragged - except ValueError: - # If ragged, just return flat list - result = [] - for element in self._matched_elements: - if isinstance(element, IDSPrimitive): - result.append(element.value) - else: - result.append(element) - return result + For multi-dimensional access, use direct indexing instead:: - flat_values = [] + # Get a specific temperature (more efficient than slicing) + temp = core_profiles.profiles_1d[0].te.values()[5] + + # Or better yet, direct access + temp_value = core_profiles.profiles_1d[0].te[5] + + For converting to numpy arrays:: + + # Use to_array() for tensorization + array = core_profiles.profiles_1d[:].te.to_array() + """ + from imas.ids_primitive import IDSPrimitive + + result = [] for element in self._matched_elements: if isinstance(element, IDSPrimitive): - flat_values.append(element.value) - elif isinstance(element, IDSNumericArray): - flat_values.append( - element.data if hasattr(element, "data") else element.value - ) + result.append(element.value) else: - flat_values.append(element) - - # For 1D, just return as is - if len(actual_shape) == 1: - return flat_values - - # Try to reshape to multi-dimensional shape - try: - # Calculate total size - total_size = 1 - for dim in actual_shape: - total_size *= dim - - # Check if sizes match - if len(flat_values) == total_size: - # Successfully reshape to multi-dimensional - return np.array(flat_values, dtype=object).reshape(actual_shape) - except (ValueError, TypeError): - pass - - # If reshape fails or not all elements are extractable, return as object array - try: - return np.array(flat_values, dtype=object).reshape(actual_shape[0:1]) - except (ValueError, TypeError): - return flat_values + result.append(element) + return result def to_array(self) -> np.ndarray: """Convert this slice to a numpy array respecting multi-dimensional structure. From f085f827cec632dd152d5dff3d50cef8f3f48bdd Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Mon, 5 Jan 2026 14:48:47 +0100 Subject: [PATCH 41/44] Simplified __getitem__ method for lazy lading https://github.com/iterorganization/IMAS-Python/pull/20#discussion_r2580868433 --- imas/ids_struct_array.py | 35 +++++++---------------------------- 1 file changed, 7 insertions(+), 28 deletions(-) diff --git a/imas/ids_struct_array.py b/imas/ids_struct_array.py index d6b892c..41ed509 100644 --- a/imas/ids_struct_array.py +++ b/imas/ids_struct_array.py @@ -130,37 +130,16 @@ def __getitem__(self, item): A single IDSStructure if item is an int, or an IDSSlice if item is a slice """ if isinstance(item, slice): - if self._lazy: - - self._load(None) # Load size - - # Convert slice to indices - start, stop, step = item.indices(len(self)) - - # Load only the elements in the slice range - loaded_elements = [] - for i in range(start, stop, step): - self._load(i) # Load each element on demand - loaded_elements.append(self.value[i]) - - from imas.ids_slice import IDSSlice - from imas.util import get_full_path - - slice_str = IDSSlice._format_slice(item) - # Build full path: parent path + this array name + slice - full_path = get_full_path(self) + slice_str - - return IDSSlice( - self.metadata, - loaded_elements, - full_path, - parent_array=self, - ) - from imas.ids_slice import IDSSlice from imas.util import get_full_path - matched_elements = self.value[item] + if self._lazy: + # Use __getitem__ for each index to trigger proper lazy loading + matched_elements = [self[i] for i in range(*item.indices(len(self)))] + else: + # Direct slice for non-lazy case + matched_elements = self.value[item] + slice_str = IDSSlice._format_slice(item) # Build full path: parent path + this array name + slice full_path = get_full_path(self) + slice_str From 44a111066e3af73d6157ddd266a340eed2f4fa24 Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Mon, 5 Jan 2026 15:02:37 +0100 Subject: [PATCH 42/44] simplified to_array method https://github.com/iterorganization/IMAS-Python/pull/20#discussion_r2580486120 --- imas/ids_slice.py | 234 ++++++++++------------------- imas/test/test_multidim_slicing.py | 20 ++- 2 files changed, 93 insertions(+), 161 deletions(-) diff --git a/imas/ids_slice.py b/imas/ids_slice.py index 4edf5ce..641625f 100644 --- a/imas/ids_slice.py +++ b/imas/ids_slice.py @@ -101,8 +101,8 @@ def shape(self) -> Tuple[int, ...]: Raises: ValueError: The underlying data is ragged (non-rectangular). Use - .is_ragged to check first, or use - .to_array() to convert to a numpy object array. + .is_ragged to check first, or use .values() to extract values + as a flat list. Returns: Tuple of dimensions. @@ -110,8 +110,8 @@ def shape(self) -> Tuple[int, ...]: if self.is_ragged: raise ValueError( f"Cannot get shape of ragged array: dimensions have varying sizes. " - f"Use .is_ragged to check if data is ragged, or .to_array() to " - f"convert to numpy object array." + f"Use .is_ragged to check if data is ragged, or .values() to " + f"get a flat list of elements." ) # Build shape from hierarchy, replacing None with actual uniform size @@ -435,170 +435,98 @@ def values(self) -> List[Any]: return result def to_array(self) -> np.ndarray: - """Convert this slice to a numpy array respecting multi-dimensional structure. + """Convert this slice to a numpy array - for leaf node slices only. - For 1D slices: returns a simple 1D array. - For multi-dimensional slices: returns an array with shape self.shape. - For ragged data: returns an object array containing the elements as-is. + This method converts a slice containing scalar or numeric array leaf nodes + to a regular numpy array with shape self.shape. It is designed for + tensorization of leaf nodes only (e.g., slices of FLT_1D, profiles, etc.). - This is useful for integration with numpy operations, scipy functions, - and xarray data structures. The returned array preserves the hierarchical - structure of the IMAS data. + For multi-dimensional access to non-leaf nodes, use direct indexing instead: + ``ids[i1][i2]`` rather than slicing with ``.to_array()``. Returns: - numpy.ndarray with shape self.shape, or object array if ragged. + numpy.ndarray with shape self.shape containing the extracted values. Raises: - ValueError: If array cannot be converted to numpy - """ - from imas.ids_primitive import IDSPrimitive, IDSNumericArray + ValueError: If slice refers to IDSStructure or IDSStructArray elements + (non-leaf nodes). Use direct indexing instead. + ValueError: If the data is ragged/non-rectangular (dimensions have + varying sizes). Use direct indexing or ``.values()`` instead. + ValueError: If values cannot be converted to numpy array. - # Try to get the actual shape (will check if ragged) - try: - actual_shape = self.shape # Will raise if ragged - is_ragged_data = False - except ValueError: - # Data is ragged - handle it gracefully - is_ragged_data = True - actual_shape = None - - # 1D case - simple conversion - if not is_ragged_data and len(actual_shape) == 1: - flat_values = [] - for element in self._matched_elements: - if isinstance(element, IDSPrimitive): - flat_values.append(element.value) - else: - flat_values.append(element) - try: - return np.array(flat_values) - except (ValueError, TypeError): - return np.array(flat_values, dtype=object) - - # Multi-dimensional case - # Check if matched elements are themselves arrays (IDSNumericArray) - if self._matched_elements and isinstance( - self._matched_elements[0], IDSNumericArray - ): - # Elements are numeric arrays - extract their values and stack them - array_values = [] - for element in self._matched_elements: - if isinstance(element, IDSNumericArray): - array_values.append(element.value) - else: - array_values.append(element) - - # For ragged data, return object array with arrays as elements - if is_ragged_data: - return np.array(array_values, dtype=object) - - # Try to stack into proper shape - try: - # Check if all arrays have the same size (regular) - sizes = [] - for val in array_values: - if hasattr(val, "__len__"): - sizes.append(len(val)) - else: - sizes.append(1) - - # If all sizes are the same, we can create a regular array - if len(set(sizes)) == 1: - # Regular array - all sub-arrays same size - stacked = np.array(array_values) - # Should now have shape (first_dim, second_dim) - if stacked.shape == actual_shape: - return stacked - else: - # Try explicit reshape - try: - return stacked.reshape(actual_shape) - except ValueError: - # If reshape fails, return as object array - result_arr = np.empty(actual_shape, dtype=object) - for i, val in enumerate(array_values): - result_arr.flat[i] = val - return result_arr - else: - result_arr = np.empty(actual_shape[0], dtype=object) - for i, val in enumerate(array_values): - result_arr[i] = val - return result_arr - except (ValueError, TypeError): - # Fallback: return object array - result_arr = np.empty(actual_shape[0], dtype=object) - for i, val in enumerate(array_values): - result_arr[i] = val - return result_arr - - # For non-numeric elements in multi-dimensional structure - # Extract and try to build structure - flat_values = [] + Examples: + Tensorize a 1D slice of numeric data:: - # First check if matched_elements are IDSStructArray (which need flattening) - from imas.ids_struct_array import IDSStructArray + # Works: leaf nodes are numeric arrays + array = core_profiles.profiles_1d[:].te.to_array() # Shape: (n_profiles,) - has_struct_arrays = self._matched_elements and isinstance( - self._matched_elements[0], IDSStructArray - ) + Multi-dimensional tensorization:: - if has_struct_arrays: - # Flatten IDSStructArray elements - for struct_array in self._matched_elements: - for element in struct_array: - if isinstance(element, IDSPrimitive): - flat_values.append(element.value) - else: - flat_values.append(element) - else: - # Regular elements - for element in self._matched_elements: - if isinstance(element, IDSPrimitive): - flat_values.append(element.value) - else: - flat_values.append(element) - - # For ragged data, construct object array from hierarchy - if is_ragged_data: - # Build object array respecting the ragged structure - if len(self._element_hierarchy) == 1: - # Simple 1D array - return np.array(flat_values, dtype=object) - else: - # Multi-level hierarchy - reconstruct structure - result_arr = np.empty(self._element_hierarchy[0], dtype=object) - idx = 0 - for i in range(self._element_hierarchy[0]): - group_size = self._element_hierarchy[1][i] - result_arr[i] = flat_values[idx:idx+group_size] - idx += group_size - return result_arr - - total_size = 1 - for dim in actual_shape: - total_size *= dim - - # Check if we have the right number of elements - if len(flat_values) != total_size: + # Works: accessing leaf nodes from nested structure + array = core_profiles.profiles_1d[:].te.to_array() # Shape: (n_profiles,) + + Direct indexing for non-leaf nodes:: + + # Don't do this - will raise ValueError + # array = core_profiles.profiles_1d[:].to_array() # ERROR! + + # Do this instead + profile = core_profiles.profiles_1d[0] # Direct access + te = profile.te.to_array() # Then tensorize + """ + from imas.ids_primitive import IDSPrimitive, IDSNumericArray + from imas.ids_struct_array import IDSStructArray + from imas.ids_structure import IDSStructure + + # Validate: slice must refer to leaf nodes only + if self._matched_elements: + first = self._matched_elements[0] + if isinstance(first, (IDSStructure, IDSStructArray)): + raise ValueError( + f"Cannot tensorize {type(first).__name__} slice - only works for " + f"leaf nodes (scalars, numeric arrays). Use direct indexing instead: " + f"ids[i][j] to access structures." + ) + + # Validate: data must be rectangular (not ragged) + if self.is_ragged: raise ValueError( - f"Cannot convert to array: expected {total_size} elements " - f"but got {len(flat_values)}" + f"Cannot tensorize ragged array - dimensions have varying sizes. " + f"Use .values() to get a flat list, or use direct indexing for " + f"multi-dimensional access." ) - # Try to create the array + # Get the target shape (we validated it's not ragged) + actual_shape = self.shape + + # Handle empty slice + if len(self._matched_elements) == 0: + return np.empty(actual_shape, dtype=float) + + # Extract values from leaf nodes + flat_values = [] + for element in self._matched_elements: + if isinstance(element, IDSPrimitive): + flat_values.append(element.value) + elif isinstance(element, IDSNumericArray): + flat_values.append(element.value) + else: + flat_values.append(element) + + # Tensorize to target shape + arr = np.array(flat_values) + + # For 1D, no reshape needed + if len(actual_shape) == 1: + return arr + + # For multi-dimensional, reshape to target shape try: - arr = np.array(flat_values) - try: - # Try to reshape to target shape - return arr.reshape(actual_shape) - except (ValueError, TypeError): - # If reshape fails, use object array - arr_obj = np.empty(actual_shape, dtype=object) - for i, val in enumerate(flat_values): - arr_obj.flat[i] = val - return arr_obj + return arr.reshape(actual_shape) except (ValueError, TypeError) as e: - raise ValueError(f"Failed to convert slice to numpy array: {e}") + raise ValueError( + f"Failed to convert slice to array with shape {actual_shape}: {e}" + ) @staticmethod def _format_slice(slice_obj: slice) -> str: diff --git a/imas/test/test_multidim_slicing.py b/imas/test/test_multidim_slicing.py index 43925c2..70783c5 100644 --- a/imas/test/test_multidim_slicing.py +++ b/imas/test/test_multidim_slicing.py @@ -77,7 +77,7 @@ def test_to_array_3d_regular(self): assert np.allclose(array[0, 1, :], [0.0, 1.0]) def test_to_array_variable_size(self): - """Test to_array() with variable-size arrays.""" + """Test to_array() raises error for ragged arrays.""" cp = IDSFactory("3.39.0").core_profiles() cp.profiles_1d.resize(3) cp.profiles_1d[0].grid.rho_tor_norm = np.array([0.0, 0.5, 1.0]) @@ -85,13 +85,17 @@ def test_to_array_variable_size(self): cp.profiles_1d[2].grid.rho_tor_norm = np.array([0.0, 0.5, 1.0]) result = cp.profiles_1d[:].grid.rho_tor_norm - array = result.to_array() - - assert array.dtype == object - assert len(array) == 3 - assert len(array[0]) == 3 - assert len(array[1]) == 5 - assert len(array[2]) == 3 + + # to_array() should raise ValueError for ragged data + with pytest.raises(ValueError, match="Cannot tensorize ragged array"): + result.to_array() + + # But .values() should still work + values = result.values() + assert len(values) == 3 + assert len(values[0]) == 3 + assert len(values[1]) == 5 + assert len(values[2]) == 3 def test_enhanced_values_2d(self): """Test enhanced values() method for 2D extraction.""" From a3ca48c8f94f06a5e3ada875e8b7853618b31d6c Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Mon, 5 Jan 2026 16:00:24 +0100 Subject: [PATCH 43/44] fixed __getitem method principle of least astonishment, https://github.com/iterorganization/IMAS-Python/pull/20#pullrequestreview-3524037332 --- imas/ids_slice.py | 237 ++++++++++++++++------------- imas/test/test_multidim_slicing.py | 54 ++++--- 2 files changed, 172 insertions(+), 119 deletions(-) diff --git a/imas/ids_slice.py b/imas/ids_slice.py index 641625f..d54f10d 100644 --- a/imas/ids_slice.py +++ b/imas/ids_slice.py @@ -114,12 +114,18 @@ def shape(self) -> Tuple[int, ...]: f"get a flat list of elements." ) - # Build shape from hierarchy, replacing None with actual uniform size + # Build shape from hierarchy shape = [] - for hierarchy_level in self._element_hierarchy: + for i, hierarchy_level in enumerate(self._element_hierarchy): if isinstance(hierarchy_level, list): - # This is a list of sizes - get the uniform size (we checked is_ragged) - shape.append(hierarchy_level[0]) + # This is a list of sizes + if i == 0: + # First level with a list means grouped data + # The number of groups is the first hierarchy level (implicit) + shape.append(len(hierarchy_level)) + else: + # Subsequent levels: use first size (uniform, we checked is_ragged) + shape.append(hierarchy_level[0] if hierarchy_level else 0) else: # This is a single count shape.append(hierarchy_level) @@ -135,106 +141,62 @@ def __iter__(self) -> Iterator[Any]: return iter(self._matched_elements) def __getitem__(self, item: Union[int, slice]) -> "IDSSlice": - """Get element(s) from the slice. + """Get element(s) from the slice using slice notation. - When the matched elements are IDSStructArray objects, the indexing - operation is applied to each array element (array-wise indexing). - Otherwise, the operation is applied to the matched elements list itself. + Only slice operations are supported. Integer indexing on IDSSlice + is not allowed to avoid confusion with array-wise operations. + Use direct indexing on the IDS structure instead. Args: - item: Index or slice to apply + item: Slice object to apply Returns: - - IDSSlice: If item is a slice, or if applying integer index to - IDSStructArray elements - - Single element: If item is an int and elements are not IDSStructArray - """ - from imas.ids_struct_array import IDSStructArray - - # Check if we have array-wise indexing (elements are IDSStructArray) - if self._matched_elements and isinstance( - self._matched_elements[0], IDSStructArray - ): - if isinstance(item, slice): - return self._handle_array_wise_slice(item) - else: - return self._handle_array_wise_integer(item) - else: - if isinstance(item, slice): - return self._handle_list_slice(item) - else: - return self._matched_elements[int(item)] + IDSSlice: A new slice with the applied slice operation - def _handle_array_wise_slice(self, item: slice) -> "IDSSlice": - """Apply a slice operation array-wise to IDSStructArray elements. - - Applies the slice to each array element and preserves the grouping - structure for multi-dimensional shapes. - - Args: - item: The slice object to apply + Raises: + TypeError: If item is an integer (not supported) - Returns: - IDSSlice with updated shape and hierarchy - """ - sliced_elements = [] - sliced_sizes = [] - - for array in self._matched_elements: - sliced = array[item] - if isinstance(sliced, IDSSlice): - sliced_elements.extend(sliced._matched_elements) - sliced_sizes.append(len(sliced)) - else: - sliced_elements.append(sliced) - sliced_sizes.append(1) + Examples: + Slice operations (supported):: - slice_str = self._format_slice(item) - # Full path: current path + slice operation - full_path = self._path + slice_str + # Get ions 0 through 2 from all profiles + result = cp.profiles_1d[:].ion[:3] # OK - returns IDSSlice + result = cp.profiles_1d[:].ion[1:3] # OK - returns IDSSlice + result = cp.profiles_1d[:].ion[::2] # OK - returns IDSSlice - # Update shape to reflect the sliced structure - # Keep first dimensions, store actual sizes (may be ragged) - new_virtual_shape = self._virtual_shape[:-1] + (None,) - new_hierarchy = self._element_hierarchy[:-1] + [sliced_sizes] + Integer indexing (NOT supported):: - return IDSSlice( - self.metadata, - sliced_elements, - full_path, - parent_array=self._parent_array, - virtual_shape=new_virtual_shape, - element_hierarchy=new_hierarchy, - ) + # These will raise TypeError + result = cp.profiles_1d[:].ion[0] # ERROR! - def _handle_array_wise_integer(self, item: int) -> "IDSSlice": - """Apply integer indexing array-wise to IDSStructArray elements. + Recommended alternatives to integer indexing:: - Applies the integer index to each array element, reducing the last - dimension to size 1. + # Option 1: Direct indexing (best - most efficient, clearest) + result = cp.profiles_1d[0].ion[:] - Args: - item: The integer index to apply + # Option 2: Convert slice to list first + ions_list = list(cp.profiles_1d[:].ion) + result = ions_list[0] - Returns: - IDSSlice with updated shape + # Option 3: Extract values + ions_values = cp.profiles_1d[:].ion.values() + result = ions_values[0] """ - indexed_elements = [array[int(item)] for array in self._matched_elements] - - # Full path: current path + index operation - full_path = self._path + f"[{item}]" - - # Shape changes: last dimension becomes 1 - new_virtual_shape = self._virtual_shape[:-1] + (1,) - - return IDSSlice( - self.metadata, - indexed_elements, - full_path, - parent_array=self._parent_array, - virtual_shape=new_virtual_shape, - element_hierarchy=self._element_hierarchy, - ) + if isinstance(item, slice): + return self._handle_list_slice(item) + else: + # Integer indexing not allowed + raise TypeError( + f"Cannot index IDSSlice with integer {item}. " + f"IDSSlice only supports slice notation (e.g., [0:5], [::2]).\n\n" + f"To access elements, use one of these alternatives:\n" + f" 1. Direct indexing (recommended):\n" + f" ids[{item}].node # Access element directly\n" + f" 2. Convert to list first:\n" + f" list(ids)[{item}] # Convert slice to list\n" + f" 3. Extract values:\n" + f" ids.values()[{item}] # Get values as flat list" + ) def _handle_list_slice(self, item: slice) -> "IDSSlice": """Apply a slice operation to the matched elements list. @@ -248,23 +210,66 @@ def _handle_list_slice(self, item: slice) -> "IDSSlice": Returns: IDSSlice with updated shape and hierarchy """ - sliced_elements = self._matched_elements[item] + from imas.ids_struct_array import IDSStructArray + slice_str = self._format_slice(item) # Full path: current path + slice operation full_path = self._path + slice_str - # Update shape to reflect the slice on first dimension - new_virtual_shape = (len(sliced_elements),) + self._virtual_shape[1:] - new_element_hierarchy = [len(sliced_elements)] + self._element_hierarchy[1:] + # Check if matched elements are IDSStructArray (nested arrays) + if self._matched_elements and isinstance(self._matched_elements[0], IDSStructArray): + # When slicing nested arrays, apply slice to each array and then flatten + flattened_elements = [] + new_hierarchy_values = [] + for array in self._matched_elements: + sliced_array = array[item] + new_hierarchy_values.append(len(sliced_array)) + # Flatten: add each element from the sliced array to flattened list + for element in sliced_array: + flattened_elements.append(element) + + # Build new hierarchy + # The key is: if we have a multi-level grouped hierarchy (like [3, [2, 2, 2], ...]), + # we're dealing with a nested structure that's already been flattened. + # We should only update the innermost level, NOT create a new top-level grouping. + + num_groups = len(self._matched_elements) + + if (len(self._element_hierarchy) >= 2 and + isinstance(self._element_hierarchy[0], int) and + isinstance(self._element_hierarchy[1], list)): + # Multi-level hierarchy like [3, [2, 2, 2], ...] + # The top level is the original grouping, so DON'T recreate it + # Just replace the last (innermost) level + new_hierarchy = self._element_hierarchy[:-1] + [new_hierarchy_values] + else: + # Single level or not grouped yet - create new grouping + new_hierarchy = [num_groups, new_hierarchy_values] + + return IDSSlice( + self.metadata, + flattened_elements, + full_path, + parent_array=self._parent_array, + virtual_shape=(len(flattened_elements),), + element_hierarchy=new_hierarchy, + ) + else: + # Normal slice on outer list + sliced_elements = self._matched_elements[item] + + # Update shape to reflect the slice on first dimension + new_virtual_shape = (len(sliced_elements),) + self._virtual_shape[1:] + new_element_hierarchy = [len(sliced_elements)] + self._element_hierarchy[1:] - return IDSSlice( - self.metadata, - sliced_elements, - full_path, - parent_array=self._parent_array, - virtual_shape=new_virtual_shape, - element_hierarchy=new_element_hierarchy, - ) + return IDSSlice( + self.metadata, + sliced_elements, + full_path, + parent_array=self._parent_array, + virtual_shape=new_virtual_shape, + element_hierarchy=new_element_hierarchy, + ) def __getattr__(self, name: str) -> "IDSSlice": """Access a child node on all matched elements. @@ -310,7 +315,13 @@ def __getattr__(self, name: str) -> "IDSSlice": ) # Get attributes from all non-empty matched elements - child_elements = [getattr(element, name) for element in self] + # Special case: if matched_elements are IDSStructArray, keep them grouped + if self._matched_elements and isinstance(self._matched_elements[0], IDSStructArray): + # For nested arrays, return the arrays themselves, not attributes from them + # This allows chaining like .ion[:].element[:] to work + child_elements = self._matched_elements + else: + child_elements = [getattr(element, name) for element in self] # Check if children are IDSStructArray (nested arrays) or IDSNumericArray if not child_elements: @@ -324,6 +335,28 @@ def __getattr__(self, name: str) -> "IDSSlice": element_hierarchy=self._element_hierarchy, ) + # If matched_elements are IDSStructArray and we're accessing an attribute on them, + # we need to get that attribute from each array's elements + if isinstance(self._matched_elements[0], IDSStructArray): + # Accessing attribute on nested arrays: need to get attr from each array's elements + flattened_elements = [] + for array in child_elements: + # array is IDSStructArray, get the attribute from its elements + for element in array: + flattened_elements.append(getattr(element, name)) + + # Keep track of grouping for shape preservation + child_sizes = [len(array) for array in child_elements] + + return IDSSlice( + child_metadata, + flattened_elements, + full_path, + parent_array=self._parent_array, + virtual_shape=self._virtual_shape + (None,), + element_hierarchy=self._element_hierarchy + [child_sizes], + ) + if isinstance(child_elements[0], IDSStructArray): # Children are IDSStructArray - track the new dimension child_sizes = [len(arr) for arr in child_elements] diff --git a/imas/test/test_multidim_slicing.py b/imas/test/test_multidim_slicing.py index 70783c5..34bb045 100644 --- a/imas/test/test_multidim_slicing.py +++ b/imas/test/test_multidim_slicing.py @@ -145,8 +145,8 @@ def test_slice_preserves_groups(self): assert result.shape == (10, 3) assert len(result) == 30 # Flattened for iteration, but shape preserved - def test_integer_index_on_nested(self): - """Test integer indexing on nested structures.""" + def test_integer_index_not_supported(self): + """Test that integer indexing on IDSSlice is not supported.""" cp = IDSFactory("3.39.0").core_profiles() cp.profiles_1d.resize(5) for i, p in enumerate(cp.profiles_1d): @@ -154,12 +154,24 @@ def test_integer_index_on_nested(self): for j, ion in enumerate(p.ion): ion.label = f"ion_{i}_{j}" - # Get first ion from all profiles - result = cp.profiles_1d[:].ion[0] + # Integer indexing on IDSSlice should raise TypeError + with pytest.raises(TypeError, match="Cannot index IDSSlice with integer"): + cp.profiles_1d[:].ion[0] - assert len(result) == 5 - for i, ion in enumerate(result): - assert ion.label == f"ion_{i}_0" + # Show the correct alternatives + # Option 1: Direct indexing (recommended) + ion_0_from_first_profile = cp.profiles_1d[0].ion[:1] # Use slice, not int index + assert len(ion_0_from_first_profile) == 1 + + # Option 2: Convert to list + ions_list = list(cp.profiles_1d[:].ion) + ions_from_first_profile = ions_list[0] + assert len(ions_from_first_profile) == 2 + + # Option 3: Extract values + ions_values = cp.profiles_1d[:].ion.values() + first_profile_ions = ions_values[0] + assert len(first_profile_ions) == 2 def test_slice_on_nested_arrays(self): """Test slicing on nested arrays.""" @@ -187,8 +199,8 @@ def test_step_slicing_on_nested(self): assert result.shape == (5, 3) # 5 profiles, 3 ions each (0, 2, 4) assert len(result) == 15 - def test_negative_indexing_on_nested(self): - """Test negative indexing on nested structures.""" + def test_negative_indexing_not_supported(self): + """Test that negative integer indexing on IDSSlice is not supported.""" cp = IDSFactory("3.39.0").core_profiles() cp.profiles_1d.resize(5) for p in cp.profiles_1d: @@ -196,12 +208,19 @@ def test_negative_indexing_on_nested(self): for j, ion in enumerate(p.ion): ion.label = f"ion_{j}" - # Get last ion from each profile - result = cp.profiles_1d[:].ion[-1] + # Negative integer indexing on IDSSlice should raise TypeError + with pytest.raises(TypeError, match="Cannot index IDSSlice with integer"): + cp.profiles_1d[:].ion[-1] - assert len(result) == 5 - for ion in result: - assert ion.label == "ion_2" + # Show the correct alternative: use slice instead + # Get last ion from each profile using slice + result = cp.profiles_1d[:].ion[2:3] # Get last element with slice + assert result.shape == (5, 1) + + # Or better: direct indexing + last_ions = [p.ion[-1] for p in cp.profiles_1d] + assert len(last_ions) == 5 + assert all(ion.label == "ion_2" for ion in last_ions) def test_to_array_grouped_structure(self): """Test that to_array preserves grouped structure.""" @@ -327,7 +346,7 @@ def test_single_element_2d(self): assert result.shape == (1, 3) def test_single_dimension_value(self): - """Test accessing a single value in multi-dimensional structure.""" + """Test accessing scalar values from nested structures.""" cp = IDSFactory("3.39.0").core_profiles() cp.profiles_1d.resize(3) for p in cp.profiles_1d: @@ -335,9 +354,10 @@ def test_single_dimension_value(self): for i in p.ion: i.z_ion = 1.0 - result = cp.profiles_1d[:].ion[0].z_ion + # Use slice notation instead of integer indexing + result = cp.profiles_1d[:].ion[:1].z_ion # Get first ion only - # Should be 3 items (one per profile) + # Should be 3 items (one per profile, one ion per profile) assert len(result) == 3 def test_slice_of_slice(self): From 9116f194fea2fc8f05b33375f3b27b48600ec228 Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Mon, 5 Jan 2026 16:26:49 +0100 Subject: [PATCH 44/44] fixed formatting --- imas/ids_slice.py | 100 ++++++++++++++++------------- imas/test/test_multidim_slicing.py | 10 +-- 2 files changed, 62 insertions(+), 48 deletions(-) diff --git a/imas/ids_slice.py b/imas/ids_slice.py index d54f10d..2aa8b47 100644 --- a/imas/ids_slice.py +++ b/imas/ids_slice.py @@ -100,20 +100,20 @@ def shape(self) -> Tuple[int, ...]: array, based on the hierarchy of slicing operations performed. Raises: - ValueError: The underlying data is ragged (non-rectangular). Use - .is_ragged to check first, or use .values() to extract values - as a flat list. + ValueError: The underlying data is ragged (non-rectangular). + Use .is_ragged to check first, or use .values() to extract + values as a flat list. Returns: Tuple of dimensions. """ if self.is_ragged: raise ValueError( - f"Cannot get shape of ragged array: dimensions have varying sizes. " - f"Use .is_ragged to check if data is ragged, or .values() to " - f"get a flat list of elements." + "Cannot get shape of ragged array: dimensions have varying " + "sizes. Use .is_ragged to check if data is ragged, or .values() " + "to get a flat list of elements." ) - + # Build shape from hierarchy shape = [] for i, hierarchy_level in enumerate(self._element_hierarchy): @@ -129,7 +129,7 @@ def shape(self) -> Tuple[int, ...]: else: # This is a single count shape.append(hierarchy_level) - + return tuple(shape) def __len__(self) -> int: @@ -211,13 +211,15 @@ def _handle_list_slice(self, item: slice) -> "IDSSlice": IDSSlice with updated shape and hierarchy """ from imas.ids_struct_array import IDSStructArray - + slice_str = self._format_slice(item) # Full path: current path + slice operation full_path = self._path + slice_str # Check if matched elements are IDSStructArray (nested arrays) - if self._matched_elements and isinstance(self._matched_elements[0], IDSStructArray): + if self._matched_elements and isinstance( + self._matched_elements[0], IDSStructArray + ): # When slicing nested arrays, apply slice to each array and then flatten flattened_elements = [] new_hierarchy_values = [] @@ -227,17 +229,20 @@ def _handle_list_slice(self, item: slice) -> "IDSSlice": # Flatten: add each element from the sliced array to flattened list for element in sliced_array: flattened_elements.append(element) - + # Build new hierarchy - # The key is: if we have a multi-level grouped hierarchy (like [3, [2, 2, 2], ...]), - # we're dealing with a nested structure that's already been flattened. - # We should only update the innermost level, NOT create a new top-level grouping. - + # The key is: if we have a multi-level grouped hierarchy + # (like [3, [2, 2, 2], ...]), we're dealing with a nested + # structure that's already been flattened. We should only update + # the innermost level, NOT create a new top-level grouping. + num_groups = len(self._matched_elements) - - if (len(self._element_hierarchy) >= 2 and - isinstance(self._element_hierarchy[0], int) and - isinstance(self._element_hierarchy[1], list)): + + if ( + len(self._element_hierarchy) >= 2 + and isinstance(self._element_hierarchy[0], int) + and isinstance(self._element_hierarchy[1], list) + ): # Multi-level hierarchy like [3, [2, 2, 2], ...] # The top level is the original grouping, so DON'T recreate it # Just replace the last (innermost) level @@ -245,7 +250,7 @@ def _handle_list_slice(self, item: slice) -> "IDSSlice": else: # Single level or not grouped yet - create new grouping new_hierarchy = [num_groups, new_hierarchy_values] - + return IDSSlice( self.metadata, flattened_elements, @@ -257,7 +262,7 @@ def _handle_list_slice(self, item: slice) -> "IDSSlice": else: # Normal slice on outer list sliced_elements = self._matched_elements[item] - + # Update shape to reflect the slice on first dimension new_virtual_shape = (len(sliced_elements),) + self._virtual_shape[1:] new_element_hierarchy = [len(sliced_elements)] + self._element_hierarchy[1:] @@ -316,7 +321,9 @@ def __getattr__(self, name: str) -> "IDSSlice": # Get attributes from all non-empty matched elements # Special case: if matched_elements are IDSStructArray, keep them grouped - if self._matched_elements and isinstance(self._matched_elements[0], IDSStructArray): + if self._matched_elements and isinstance( + self._matched_elements[0], IDSStructArray + ): # For nested arrays, return the arrays themselves, not attributes from them # This allows chaining like .ion[:].element[:] to work child_elements = self._matched_elements @@ -335,19 +342,21 @@ def __getattr__(self, name: str) -> "IDSSlice": element_hierarchy=self._element_hierarchy, ) - # If matched_elements are IDSStructArray and we're accessing an attribute on them, - # we need to get that attribute from each array's elements + # If matched_elements are IDSStructArray and we're accessing an + # attribute on them, we need to get that attribute from each + # array's elements if isinstance(self._matched_elements[0], IDSStructArray): - # Accessing attribute on nested arrays: need to get attr from each array's elements + # Accessing attribute on nested arrays: get attr from each + # array's elements flattened_elements = [] for array in child_elements: - # array is IDSStructArray, get the attribute from its elements + # array is IDSStructArray, get attribute from its elements for element in array: flattened_elements.append(getattr(element, name)) - + # Keep track of grouping for shape preservation child_sizes = [len(array) for array in child_elements] - + return IDSSlice( child_metadata, flattened_elements, @@ -417,7 +426,10 @@ def __repr__(self) -> str: """ ids_name = self.metadata.ids_name item_word = "item" if len(self) == 1 else "items" - return f"<{type(self).__name__} (IDS:{ids_name}, {self._path} with {len(self)} {item_word})>" + return ( + f"<{type(self).__name__} (IDS:{ids_name}, {self._path} with " + f"{len(self)} {item_word})>" + ) def values(self) -> List[Any]: """Extract raw values from elements in this slice. @@ -425,13 +437,13 @@ def values(self) -> List[Any]: For IDSPrimitive elements, this extracts the wrapped value. For other element types, returns them as-is. - Returns a flat list of extracted values. This is useful for getting - the actual data without the IDS wrapper when accessing scalar fields - through a slice, without requiring explicit looping through the + Returns a flat list of extracted values. This is useful for getting + the actual data without the IDS wrapper when accessing scalar fields + through a slice, without requiring explicit looping through the original collection. For multi-dimensional access to values: - - Use direct indexing: ``ids_obj[i1].collection[i2].value`` for best + - Use direct indexing: ``ids_obj[i1].collection[i2].value`` for best performance and clarity - Use ``.to_array()`` if you need numpy array integration @@ -491,12 +503,14 @@ def to_array(self) -> np.ndarray: Tensorize a 1D slice of numeric data:: # Works: leaf nodes are numeric arrays - array = core_profiles.profiles_1d[:].te.to_array() # Shape: (n_profiles,) + array = core_profiles.profiles_1d[:].te.to_array() + # Shape: (n_profiles,) Multi-dimensional tensorization:: # Works: accessing leaf nodes from nested structure - array = core_profiles.profiles_1d[:].te.to_array() # Shape: (n_profiles,) + array = core_profiles.profiles_1d[:].te.to_array() + # Shape: (n_profiles,) Direct indexing for non-leaf nodes:: @@ -516,17 +530,17 @@ def to_array(self) -> np.ndarray: first = self._matched_elements[0] if isinstance(first, (IDSStructure, IDSStructArray)): raise ValueError( - f"Cannot tensorize {type(first).__name__} slice - only works for " - f"leaf nodes (scalars, numeric arrays). Use direct indexing instead: " - f"ids[i][j] to access structures." + f"Cannot tensorize {type(first).__name__} slice - only " + f"works for leaf nodes (scalars, numeric arrays). Use " + f"direct indexing instead: ids[i][j] to access structures." ) # Validate: data must be rectangular (not ragged) if self.is_ragged: raise ValueError( - f"Cannot tensorize ragged array - dimensions have varying sizes. " - f"Use .values() to get a flat list, or use direct indexing for " - f"multi-dimensional access." + "Cannot tensorize ragged array - dimensions have varying " + "sizes. Use .values() to get a flat list, or use direct " + "indexing for multi-dimensional access." ) # Get the target shape (we validated it's not ragged) @@ -548,11 +562,11 @@ def to_array(self) -> np.ndarray: # Tensorize to target shape arr = np.array(flat_values) - + # For 1D, no reshape needed if len(actual_shape) == 1: return arr - + # For multi-dimensional, reshape to target shape try: return arr.reshape(actual_shape) diff --git a/imas/test/test_multidim_slicing.py b/imas/test/test_multidim_slicing.py index 34bb045..0611489 100644 --- a/imas/test/test_multidim_slicing.py +++ b/imas/test/test_multidim_slicing.py @@ -85,11 +85,11 @@ def test_to_array_variable_size(self): cp.profiles_1d[2].grid.rho_tor_norm = np.array([0.0, 0.5, 1.0]) result = cp.profiles_1d[:].grid.rho_tor_norm - + # to_array() should raise ValueError for ragged data with pytest.raises(ValueError, match="Cannot tensorize ragged array"): result.to_array() - + # But .values() should still work values = result.values() assert len(values) == 3 @@ -162,12 +162,12 @@ def test_integer_index_not_supported(self): # Option 1: Direct indexing (recommended) ion_0_from_first_profile = cp.profiles_1d[0].ion[:1] # Use slice, not int index assert len(ion_0_from_first_profile) == 1 - + # Option 2: Convert to list ions_list = list(cp.profiles_1d[:].ion) ions_from_first_profile = ions_list[0] assert len(ions_from_first_profile) == 2 - + # Option 3: Extract values ions_values = cp.profiles_1d[:].ion.values() first_profile_ions = ions_values[0] @@ -216,7 +216,7 @@ def test_negative_indexing_not_supported(self): # Get last ion from each profile using slice result = cp.profiles_1d[:].ion[2:3] # Get last element with slice assert result.shape == (5, 1) - + # Or better: direct indexing last_ions = [p.ion[-1] for p in cp.profiles_1d] assert len(last_ions) == 5