diff --git a/changelog_entry.yaml b/changelog_entry.yaml index e69de29b..66b076a7 100644 --- a/changelog_entry.yaml +++ b/changelog_entry.yaml @@ -0,0 +1,4 @@ +- bump: patch + changes: + fixed: + - Fixed pandas 3.0 compatibility in ParameterNodeAtInstant.__getitem__() by converting pandas StringArray to numpy array before using for fancy indexing (fixes #429) diff --git a/policyengine_core/parameters/parameter_node_at_instant.py b/policyengine_core/parameters/parameter_node_at_instant.py index 67f4695e..f3d8c6ef 100644 --- a/policyengine_core/parameters/parameter_node_at_instant.py +++ b/policyengine_core/parameters/parameter_node_at_instant.py @@ -54,6 +54,10 @@ def __getitem__( self, key: str ) -> Union["ParameterNodeAtInstant", VectorialParameterNodeAtInstant]: # If fancy indexing is used, cast to a vectorial node + # Convert pandas arrays (e.g., StringArray from pandas 3) to numpy + # before checking, since StringArray has __array__ but is not hashable + if hasattr(key, "__array__") and not isinstance(key, numpy.ndarray): + key = numpy.asarray(key) if isinstance(key, numpy.ndarray): return parameters.VectorialParameterNodeAtInstant.build_from_node( self diff --git a/tests/core/test_pandas3_compatibility.py b/tests/core/test_pandas3_compatibility.py index 1cce2978..38a02f84 100644 --- a/tests/core/test_pandas3_compatibility.py +++ b/tests/core/test_pandas3_compatibility.py @@ -93,6 +93,61 @@ def test_parameter_node_getitem_with_string_array(self): numpy_array = np.asarray(string_array) assert isinstance(numpy_array, np.ndarray) + def test_parameter_node_at_instant_getitem_with_string_array(self): + """ + Regression test for pandas 3.0 StringArray compatibility. + + ParameterNodeAtInstant.__getitem__ should handle pandas StringArray + by converting it to numpy array before using it for fancy indexing. + Without the fix, this raises: TypeError: unhashable type: 'StringArray' + + This is issue #429. + """ + from policyengine_core.parameters import ParameterNode + import tempfile + import os + import yaml + + # Create a minimal parameter tree for testing + with tempfile.TemporaryDirectory() as tmpdir: + # Create parameter YAML files + param_dir = os.path.join(tmpdir, "test_params") + os.makedirs(param_dir) + + # Create zone parameters + for zone in ["zone_1", "zone_2"]: + zone_file = os.path.join(param_dir, f"{zone}.yaml") + with open(zone_file, "w") as f: + yaml.dump( + { + "values": { + "2024-01-01": { + "value": 1.0 if zone == "zone_1" else 2.0 + } + }, + "metadata": {"unit": "currency-GBP"}, + }, + f, + ) + + # Load the parameter node + node = ParameterNode(directory_path=param_dir) + node_at_instant = node("2024-01-01") + + # Test with numpy array - should work + key_numpy = np.array(["zone_1", "zone_2"]) + result_numpy = node_at_instant[key_numpy] + assert len(result_numpy) == 2 + + # Test with pandas StringArray - this was failing before the fix + # TypeError: unhashable type: 'StringArray' + key_string_array = pd.array(["zone_1", "zone_2"], dtype="string") + result_string_array = node_at_instant[key_string_array] + assert len(result_string_array) == 2 + + # Results should be the same + np.testing.assert_array_equal(result_numpy, result_string_array) + def test_vectorial_parameter_node_with_string_array(self): """ VectorialParameterNodeAtInstant.__getitem__ should handle pandas