From 68ad5ecf0db8f1390469d01730e55e2b550d55be Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Sun, 25 Jan 2026 08:49:32 -0500 Subject: [PATCH 1/4] Fix pandas 3.0 StringArray compatibility in ParameterNodeAtInstant ParameterNodeAtInstant.__getitem__() now converts pandas StringArray to numpy array before checking for fancy indexing. Without this fix, passing a StringArray as a key raises: TypeError: unhashable type: 'StringArray' This mirrors the existing fix in VectorialParameterNodeAtInstant. Fixes #429 Co-Authored-By: Claude Opus 4.5 --- changelog_entry.yaml | 4 ++ .../parameters/parameter_node_at_instant.py | 4 ++ tests/core/test_pandas3_compatibility.py | 51 +++++++++++++++++++ 3 files changed, 59 insertions(+) diff --git a/changelog_entry.yaml b/changelog_entry.yaml index e69de29bb..66b076a77 100644 --- a/changelog_entry.yaml +++ b/changelog_entry.yaml @@ -0,0 +1,4 @@ +- bump: patch + changes: + fixed: + - Fixed pandas 3.0 compatibility in ParameterNodeAtInstant.__getitem__() by converting pandas StringArray to numpy array before using for fancy indexing (fixes #429) diff --git a/policyengine_core/parameters/parameter_node_at_instant.py b/policyengine_core/parameters/parameter_node_at_instant.py index 67f4695eb..f3d8c6ef8 100644 --- a/policyengine_core/parameters/parameter_node_at_instant.py +++ b/policyengine_core/parameters/parameter_node_at_instant.py @@ -54,6 +54,10 @@ def __getitem__( self, key: str ) -> Union["ParameterNodeAtInstant", VectorialParameterNodeAtInstant]: # If fancy indexing is used, cast to a vectorial node + # Convert pandas arrays (e.g., StringArray from pandas 3) to numpy + # before checking, since StringArray has __array__ but is not hashable + if hasattr(key, "__array__") and not isinstance(key, numpy.ndarray): + key = numpy.asarray(key) if isinstance(key, numpy.ndarray): return parameters.VectorialParameterNodeAtInstant.build_from_node( self diff --git a/tests/core/test_pandas3_compatibility.py b/tests/core/test_pandas3_compatibility.py index 1cce29781..d6281c45b 100644 --- a/tests/core/test_pandas3_compatibility.py +++ b/tests/core/test_pandas3_compatibility.py @@ -93,6 +93,57 @@ def test_parameter_node_getitem_with_string_array(self): numpy_array = np.asarray(string_array) assert isinstance(numpy_array, np.ndarray) + def test_parameter_node_at_instant_getitem_with_string_array(self): + """ + Regression test for pandas 3.0 StringArray compatibility. + + ParameterNodeAtInstant.__getitem__ should handle pandas StringArray + by converting it to numpy array before using it for fancy indexing. + Without the fix, this raises: TypeError: unhashable type: 'StringArray' + + This is issue #429. + """ + from policyengine_core.parameters import ParameterNode + import tempfile + import os + import yaml + + # Create a minimal parameter tree for testing + with tempfile.TemporaryDirectory() as tmpdir: + # Create parameter YAML files + param_dir = os.path.join(tmpdir, "test_params") + os.makedirs(param_dir) + + # Create zone parameters + for zone in ["zone_1", "zone_2"]: + zone_file = os.path.join(param_dir, f"{zone}.yaml") + with open(zone_file, "w") as f: + yaml.dump( + { + "values": {"2024-01-01": {"value": 1.0 if zone == "zone_1" else 2.0}}, + "metadata": {"unit": "currency-GBP"}, + }, + f, + ) + + # Load the parameter node + node = ParameterNode(directory_path=param_dir) + node_at_instant = node("2024-01-01") + + # Test with numpy array - should work + key_numpy = np.array(["zone_1", "zone_2"]) + result_numpy = node_at_instant[key_numpy] + assert len(result_numpy) == 2 + + # Test with pandas StringArray - this was failing before the fix + # TypeError: unhashable type: 'StringArray' + key_string_array = pd.array(["zone_1", "zone_2"], dtype="string") + result_string_array = node_at_instant[key_string_array] + assert len(result_string_array) == 2 + + # Results should be the same + np.testing.assert_array_equal(result_numpy, result_string_array) + def test_vectorial_parameter_node_with_string_array(self): """ VectorialParameterNodeAtInstant.__getitem__ should handle pandas From 6bc671719089486452cb3a63e190c43b4dcc50f4 Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Sun, 25 Jan 2026 08:50:42 -0500 Subject: [PATCH 2/4] Fix black formatting Co-Authored-By: Claude Opus 4.5 --- policyengine_core/populations/population.py | 8 ++++++-- tests/core/test_pandas3_compatibility.py | 6 +++++- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/policyengine_core/populations/population.py b/policyengine_core/populations/population.py index 988485ec0..c2d9d0f4d 100644 --- a/policyengine_core/populations/population.py +++ b/policyengine_core/populations/population.py @@ -78,13 +78,17 @@ def check_period_validity( if period is None: stack = traceback.extract_stack() filename, line_number, function_name, line_of_code = stack[-3] - raise ValueError(""" + raise ValueError( + """ You requested computation of variable "{}", but you did not specify on which period in "{}:{}": {} When you request the computation of a variable within a formula, you must always specify the period as the second parameter. The convention is to call this parameter "period". For example: computed_salary = person('salary', period). See more information at . -""".format(variable_name, filename, line_number, line_of_code)) +""".format( + variable_name, filename, line_number, line_of_code + ) + ) def __call__( self, diff --git a/tests/core/test_pandas3_compatibility.py b/tests/core/test_pandas3_compatibility.py index d6281c45b..38a02f84c 100644 --- a/tests/core/test_pandas3_compatibility.py +++ b/tests/core/test_pandas3_compatibility.py @@ -120,7 +120,11 @@ def test_parameter_node_at_instant_getitem_with_string_array(self): with open(zone_file, "w") as f: yaml.dump( { - "values": {"2024-01-01": {"value": 1.0 if zone == "zone_1" else 2.0}}, + "values": { + "2024-01-01": { + "value": 1.0 if zone == "zone_1" else 2.0 + } + }, "metadata": {"unit": "currency-GBP"}, }, f, From 2ce64ef7c71f5fe5c91ea21039003eb773f0205c Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Sun, 25 Jan 2026 08:55:22 -0500 Subject: [PATCH 3/4] Trigger CI Co-Authored-By: Claude Opus 4.5 From d1fc89449c5d9223db0a03e9e9b7559975af605a Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Sun, 25 Jan 2026 08:58:44 -0500 Subject: [PATCH 4/4] Fix black 26.1.0 formatting Co-Authored-By: Claude Opus 4.5 --- policyengine_core/populations/population.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/policyengine_core/populations/population.py b/policyengine_core/populations/population.py index c2d9d0f4d..988485ec0 100644 --- a/policyengine_core/populations/population.py +++ b/policyengine_core/populations/population.py @@ -78,17 +78,13 @@ def check_period_validity( if period is None: stack = traceback.extract_stack() filename, line_number, function_name, line_of_code = stack[-3] - raise ValueError( - """ + raise ValueError(""" You requested computation of variable "{}", but you did not specify on which period in "{}:{}": {} When you request the computation of a variable within a formula, you must always specify the period as the second parameter. The convention is to call this parameter "period". For example: computed_salary = person('salary', period). See more information at . -""".format( - variable_name, filename, line_number, line_of_code - ) - ) +""".format(variable_name, filename, line_number, line_of_code)) def __call__( self,