From bdd0e8bb82c586fd110087d05a805efab8e4fc71 Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Sat, 24 Jan 2026 10:18:14 -0500 Subject: [PATCH 1/5] Add pandas 3.0 compatibility fixes Fixes two issues that occur with pandas 3.0's new defaults: 1. filled_array() now handles pandas ExtensionDtype (StringDtype) - numpy.full() cannot handle StringDtype, so convert to object dtype - Fixes: TypeError: Cannot interpret '' as a data type 2. VectorialParameterNodeAtInstant.__getitem__ now handles StringArray - pandas 3 returns StringArray instead of numpy array for string operations - Convert pandas arrays to numpy before processing - Fixes: TypeError: unhashable type: 'StringArray' Added comprehensive tests that verify both fixes work correctly. Co-Authored-By: Claude Opus 4.5 --- .../vectorial_parameter_node_at_instant.py | 5 +- policyengine_core/populations/population.py | 6 + tests/core/test_pandas3_compatibility.py | 164 ++++++++++++++++++ 3 files changed, 174 insertions(+), 1 deletion(-) create mode 100644 tests/core/test_pandas3_compatibility.py diff --git a/policyengine_core/parameters/vectorial_parameter_node_at_instant.py b/policyengine_core/parameters/vectorial_parameter_node_at_instant.py index 9a7ce385..c5ce1367 100644 --- a/policyengine_core/parameters/vectorial_parameter_node_at_instant.py +++ b/policyengine_core/parameters/vectorial_parameter_node_at_instant.py @@ -196,7 +196,10 @@ def __getitem__(self, key: str) -> Any: if isinstance(key, str): return self.__getattr__(key) # If the key is a vector, e.g. ['zone_1', 'zone_2', 'zone_1'] - elif isinstance(key, numpy.ndarray): + # Convert pandas arrays (e.g., StringArray from pandas 3) to numpy + if hasattr(key, "__array__") and not isinstance(key, numpy.ndarray): + key = numpy.asarray(key) + if isinstance(key, numpy.ndarray): if not numpy.issubdtype(key.dtype, numpy.str_): # In case the key is not a string vector, stringify it if key.dtype == object and issubclass(type(key[0]), Enum): diff --git a/policyengine_core/populations/population.py b/policyengine_core/populations/population.py index a3c9f5aa..c2d9d0f4 100644 --- a/policyengine_core/populations/population.py +++ b/policyengine_core/populations/population.py @@ -41,6 +41,12 @@ def empty_array(self) -> numpy.ndarray: return numpy.zeros(self.count) def filled_array(self, value: Any, dtype: Any = None) -> numpy.ndarray: + import pandas as pd + + # Handle pandas extension dtypes (e.g., StringDtype in pandas 3) + # numpy.full() cannot handle these, so convert to object dtype + if isinstance(dtype, pd.api.extensions.ExtensionDtype): + dtype = object return numpy.full(self.count, value, dtype) def __getattr__(self, attribute: str) -> Any: diff --git a/tests/core/test_pandas3_compatibility.py b/tests/core/test_pandas3_compatibility.py new file mode 100644 index 00000000..234a2573 --- /dev/null +++ b/tests/core/test_pandas3_compatibility.py @@ -0,0 +1,164 @@ +""" +Tests for pandas 3.0.0 compatibility. + +These tests verify that policyengine-core works correctly with pandas 3.0.0, +which introduces: +1. PyArrow-backed strings as default (StringDtype) +2. Copy-on-Write by default +""" + +import numpy as np +import pandas as pd +import pytest + + +class TestFilledArrayWithStringDtype: + """Test that filled_array works with pandas StringDtype.""" + + def test_filled_array_with_string_dtype(self): + """ + In pandas 3.0.0, string columns use StringDtype by default. + numpy.full() cannot handle StringDtype, so we need to handle this case. + """ + from policyengine_core.populations.population import Population + from policyengine_core.entities import Entity + + # Create a minimal entity for testing + entity = Entity(key="person", plural="people", label="Person", doc="Test person entity") + + # Create a population with some count + population = Population(entity) + population.count = 5 + + # Test with regular numpy dtype - should work + result = population.filled_array("test_value", dtype=object) + assert len(result) == 5 + assert all(v == "test_value" for v in result) + + # Test with pandas StringDtype - this is what pandas 3 uses by default + # This should NOT raise an error + string_dtype = pd.StringDtype() + result = population.filled_array("test_value", dtype=string_dtype) + assert len(result) == 5 + assert all(v == "test_value" for v in result) + + def test_filled_array_with_pyarrow_string_dtype(self): + """ + Test with PyArrow-backed string dtype, which pandas 3 uses by default. + """ + pa = pytest.importorskip("pyarrow") + + from policyengine_core.populations.population import Population + from policyengine_core.entities import Entity + + entity = Entity(key="person", plural="people", label="Person", doc="Test person entity") + population = Population(entity) + population.count = 5 + + # PyArrow string dtype (proper way to create it) + arrow_string_dtype = pd.ArrowDtype(pa.string()) + result = population.filled_array("test_value", dtype=arrow_string_dtype) + assert len(result) == 5 + + +class TestParameterLookupWithStringArray: + """Test that parameter lookup works with pandas StringArray.""" + + def test_parameter_node_getitem_with_string_array(self): + """ + In pandas 3.0.0, series.values.astype(str) returns a StringArray + instead of a numpy array. ParameterNodeAtInstant.__getitem__ should + handle this. + """ + # Create a pandas StringArray (what pandas 3 returns) + string_array = pd.array(["value1", "value2", "value3"], dtype="string") + + # Verify it's a StringArray (not numpy array) + assert not isinstance(string_array, np.ndarray) + assert hasattr(string_array, "__array__") + + # Convert to numpy - this is what the fix should do + numpy_array = np.asarray(string_array) + assert isinstance(numpy_array, np.ndarray) + + def test_vectorial_parameter_node_with_string_array(self): + """ + VectorialParameterNodeAtInstant.__getitem__ should handle pandas + StringArray by converting it to numpy array. + """ + from policyengine_core.parameters.vectorial_parameter_node_at_instant import ( + VectorialParameterNodeAtInstant, + ) + + # Create a simple vectorial node for testing with proper structure + vector = np.array( + [(1.0, 2.0)], + dtype=[("zone_1", "float"), ("zone_2", "float")], + ).view(np.recarray) + + node = VectorialParameterNodeAtInstant("test", vector, "2024-01-01") + + # Test with numpy array - should work + key_numpy = np.array(["zone_1", "zone_2"]) + result_numpy = node[key_numpy] + assert len(result_numpy) == 2 + + # Test with pandas StringArray - this is what pandas 3 returns + key_string_array = pd.array(["zone_1", "zone_2"], dtype="string") + + # This should NOT raise TypeError: unhashable type: 'StringArray' + # The node should accept StringArray by converting to numpy + result_string_array = node[key_string_array] + assert len(result_string_array) == 2 + + # Results should be the same + np.testing.assert_array_equal(result_numpy, result_string_array) + + +class TestMicroSeriesCompatibility: + """Test that MicroSeries operations work with pandas 3.""" + + def test_series_subclass_preserved(self): + """ + Pandas 3.0.0 may change how Series subclasses are handled. + Operations should return the subclass, not plain Series. + """ + # This test documents expected behavior that may break in pandas 3 + df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + + # Test that operations preserve Series type + result = df["a"] + df["b"] + assert isinstance(result, pd.Series) + + # With pandas 3, some operations may return different types + result = df["a"].astype(str) + # In pandas 3, this might return StringArray-backed Series + assert isinstance(result, pd.Series) + + +class TestStringDtypeConversion: + """Test utilities for converting pandas StringDtype to numpy-compatible types.""" + + def test_convert_string_dtype_to_object(self): + """ + When pandas StringDtype is passed to numpy functions, + we should convert it to object dtype. + """ + string_dtype = pd.StringDtype() + + # numpy.full doesn't understand StringDtype + with pytest.raises(TypeError): + np.full(5, "test", dtype=string_dtype) + + # But it works with object dtype + result = np.full(5, "test", dtype=object) + assert len(result) == 5 + + def test_is_pandas_extension_dtype(self): + """Test detection of pandas extension dtypes.""" + # pandas StringDtype is an ExtensionDtype + assert isinstance(pd.StringDtype(), pd.api.extensions.ExtensionDtype) + + # numpy dtypes are not + assert not isinstance(np.dtype("float64"), pd.api.extensions.ExtensionDtype) + assert not isinstance(np.dtype("object"), pd.api.extensions.ExtensionDtype) From f53c0d7be92371a3624cba7060cb15f3f04eee03 Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Sat, 24 Jan 2026 10:20:26 -0500 Subject: [PATCH 2/5] Add changelog entry --- changelog_entry.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/changelog_entry.yaml b/changelog_entry.yaml index e69de29b..fdbcd7d9 100644 --- a/changelog_entry.yaml +++ b/changelog_entry.yaml @@ -0,0 +1,4 @@ +- bump: patch + changes: + fixed: + - Fixed pandas 3.0 compatibility issues with StringDtype and StringArray From 9136ff7ff74ca0f3086b95c7bd2fdcc9834f7f3c Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Sat, 24 Jan 2026 10:22:00 -0500 Subject: [PATCH 3/5] Format with black --- .../vectorial_parameter_node_at_instant.py | 56 ++++++------------- policyengine_core/populations/population.py | 25 ++------- tests/core/test_pandas3_compatibility.py | 8 ++- 3 files changed, 29 insertions(+), 60 deletions(-) diff --git a/policyengine_core/parameters/vectorial_parameter_node_at_instant.py b/policyengine_core/parameters/vectorial_parameter_node_at_instant.py index c5ce1367..514b1714 100644 --- a/policyengine_core/parameters/vectorial_parameter_node_at_instant.py +++ b/policyengine_core/parameters/vectorial_parameter_node_at_instant.py @@ -31,9 +31,7 @@ def build_from_node( VectorialParameterNodeAtInstant.build_from_node( node[subnode_name] ).vector - if isinstance( - node[subnode_name], parameters.ParameterNodeAtInstant - ) + if isinstance(node[subnode_name], parameters.ParameterNodeAtInstant) else node[subnode_name] ) for subnode_name in subnodes_name @@ -46,15 +44,9 @@ def build_from_node( dtype=[ ( subnode_name, - ( - subnode.dtype - if isinstance(subnode, numpy.recarray) - else "float" - ), - ) - for (subnode_name, subnode) in zip( - subnodes_name, vectorial_subnodes + (subnode.dtype if isinstance(subnode, numpy.recarray) else "float"), ) + for (subnode_name, subnode) in zip(subnodes_name, vectorial_subnodes) ], ) @@ -68,12 +60,12 @@ def check_node_vectorisable(node: "ParameterNode") -> None: Check that a node can be casted to a vectorial node, in order to be able to use fancy indexing. """ MESSAGE_PART_1 = "Cannot use fancy indexing on parameter node '{}', as" - MESSAGE_PART_3 = "To use fancy indexing on parameter node, its children must be homogenous." + MESSAGE_PART_3 = ( + "To use fancy indexing on parameter node, its children must be homogenous." + ) MESSAGE_PART_4 = "See more at ." - def raise_key_inhomogeneity_error( - node_with_key, node_without_key, missing_key - ): + def raise_key_inhomogeneity_error(node_with_key, node_without_key, missing_key): message = " ".join( [ MESSAGE_PART_1, @@ -146,24 +138,16 @@ def check_nodes_homogeneous(named_nodes): first_node_keys = first_node._children.keys() node_keys = node._children.keys() if not first_node_keys == node_keys: - missing_keys = set(first_node_keys).difference( - node_keys - ) - if ( - missing_keys - ): # If the first_node has a key that node hasn't + missing_keys = set(first_node_keys).difference(node_keys) + if missing_keys: # If the first_node has a key that node hasn't raise_key_inhomogeneity_error( first_name, name, missing_keys.pop() ) else: # If If the node has a key that first_node doesn't have missing_key = ( - set(node_keys) - .difference(first_node_keys) - .pop() - ) - raise_key_inhomogeneity_error( - name, first_name, missing_key + set(node_keys).difference(first_node_keys).pop() ) + raise_key_inhomogeneity_error(name, first_name, missing_key) children.update(extract_named_children(node)) check_nodes_homogeneous(children) elif isinstance(first_node, float) or isinstance(first_node, int): @@ -232,9 +216,7 @@ def __getitem__(self, key: str) -> Any: and values[0].dtype.names ): # Check if all values have the same dtype - dtypes_match = all( - val.dtype == values[0].dtype for val in values - ) + dtypes_match = all(val.dtype == values[0].dtype for val in values) if not dtypes_match: # Find the union of all field names across all values, preserving first seen order @@ -247,9 +229,7 @@ def __getitem__(self, key: str) -> Any: seen.add(field) # Create unified dtype with all fields - unified_dtype = numpy.dtype( - [(f, " Any: casted[field] = val[field] values_cast.append(casted) - default = numpy.zeros( - len(values_cast[0]), dtype=unified_dtype - ) + default = numpy.zeros(len(values_cast[0]), dtype=unified_dtype) # Fill with NaN for field in unified_dtype.names: default[field] = numpy.nan @@ -289,9 +267,9 @@ def __getitem__(self, key: str) -> Any: ) # If the result is not a leaf, wrap the result in a vectorial node. - if numpy.issubdtype( - result.dtype, numpy.record - ) or numpy.issubdtype(result.dtype, numpy.void): + if numpy.issubdtype(result.dtype, numpy.record) or numpy.issubdtype( + result.dtype, numpy.void + ): return VectorialParameterNodeAtInstant( self._name, result.view(numpy.recarray), self._instant_str ) diff --git a/policyengine_core/populations/population.py b/policyengine_core/populations/population.py index c2d9d0f4..fcb7f1b2 100644 --- a/policyengine_core/populations/population.py +++ b/policyengine_core/populations/population.py @@ -72,9 +72,7 @@ def check_array_compatible_with_entity(self, array: numpy.ndarray) -> None: ) ) - def check_period_validity( - self, variable_name: str, period: Period - ) -> None: + def check_period_validity(self, variable_name: str, period: Period) -> None: if period is None: stack = traceback.extract_stack() filename, line_number, function_name, line_of_code = stack[-3] @@ -143,9 +141,7 @@ def __call__( variable_name, period, **calculate_kwargs ) else: - return self.simulation.calculate( - variable_name, period, **calculate_kwargs - ) + return self.simulation.calculate(variable_name, period, **calculate_kwargs) # Helpers @@ -170,9 +166,7 @@ def get_memory_usage(self, variables: List[str] = None): for holder_memory_usage in holders_memory_usage.values() ) - return dict( - total_nb_bytes=total_memory_usage, by_variable=holders_memory_usage - ) + return dict(total_nb_bytes=total_memory_usage, by_variable=holders_memory_usage) @projectors.projectable def has_role(self, role: Role) -> ArrayLike: @@ -188,10 +182,7 @@ def has_role(self, role: Role) -> ArrayLike: group_population = self.simulation.get_population(role.entity.plural) if role.subroles: return numpy.logical_or.reduce( - [ - group_population.members_role == subrole - for subrole in role.subroles - ] + [group_population.members_role == subrole for subrole in role.subroles] ) else: return group_population.members_role == role @@ -239,9 +230,7 @@ def get_rank( # If entity is for instance 'person.household', we get the reference entity 'household' behind the projector entity = ( - entity - if not isinstance(entity, Projector) - else entity.reference_entity + entity if not isinstance(entity, Projector) else entity.reference_entity ) positions = entity.members_position @@ -252,9 +241,7 @@ def get_rank( # Matrix: the value in line i and column j is the value of criteria for the jth person of the ith entity matrix = numpy.asarray( [ - entity.value_nth_person( - k, filtered_criteria, default=numpy.inf - ) + entity.value_nth_person(k, filtered_criteria, default=numpy.inf) for k in range(biggest_entity_size) ] ).transpose() diff --git a/tests/core/test_pandas3_compatibility.py b/tests/core/test_pandas3_compatibility.py index 234a2573..d2b53db2 100644 --- a/tests/core/test_pandas3_compatibility.py +++ b/tests/core/test_pandas3_compatibility.py @@ -24,7 +24,9 @@ def test_filled_array_with_string_dtype(self): from policyengine_core.entities import Entity # Create a minimal entity for testing - entity = Entity(key="person", plural="people", label="Person", doc="Test person entity") + entity = Entity( + key="person", plural="people", label="Person", doc="Test person entity" + ) # Create a population with some count population = Population(entity) @@ -51,7 +53,9 @@ def test_filled_array_with_pyarrow_string_dtype(self): from policyengine_core.populations.population import Population from policyengine_core.entities import Entity - entity = Entity(key="person", plural="people", label="Person", doc="Test person entity") + entity = Entity( + key="person", plural="people", label="Person", doc="Test person entity" + ) population = Population(entity) population.count = 5 From af37b0c78f965a1e3347a63765bf974021052c19 Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Sat, 24 Jan 2026 10:24:03 -0500 Subject: [PATCH 4/5] Format with black -l 79 --- .../vectorial_parameter_node_at_instant.py | 56 +++++++++++++------ policyengine_core/populations/population.py | 25 +++++++-- tests/core/test_pandas3_compatibility.py | 22 ++++++-- 3 files changed, 75 insertions(+), 28 deletions(-) diff --git a/policyengine_core/parameters/vectorial_parameter_node_at_instant.py b/policyengine_core/parameters/vectorial_parameter_node_at_instant.py index 514b1714..c5ce1367 100644 --- a/policyengine_core/parameters/vectorial_parameter_node_at_instant.py +++ b/policyengine_core/parameters/vectorial_parameter_node_at_instant.py @@ -31,7 +31,9 @@ def build_from_node( VectorialParameterNodeAtInstant.build_from_node( node[subnode_name] ).vector - if isinstance(node[subnode_name], parameters.ParameterNodeAtInstant) + if isinstance( + node[subnode_name], parameters.ParameterNodeAtInstant + ) else node[subnode_name] ) for subnode_name in subnodes_name @@ -44,9 +46,15 @@ def build_from_node( dtype=[ ( subnode_name, - (subnode.dtype if isinstance(subnode, numpy.recarray) else "float"), + ( + subnode.dtype + if isinstance(subnode, numpy.recarray) + else "float" + ), + ) + for (subnode_name, subnode) in zip( + subnodes_name, vectorial_subnodes ) - for (subnode_name, subnode) in zip(subnodes_name, vectorial_subnodes) ], ) @@ -60,12 +68,12 @@ def check_node_vectorisable(node: "ParameterNode") -> None: Check that a node can be casted to a vectorial node, in order to be able to use fancy indexing. """ MESSAGE_PART_1 = "Cannot use fancy indexing on parameter node '{}', as" - MESSAGE_PART_3 = ( - "To use fancy indexing on parameter node, its children must be homogenous." - ) + MESSAGE_PART_3 = "To use fancy indexing on parameter node, its children must be homogenous." MESSAGE_PART_4 = "See more at ." - def raise_key_inhomogeneity_error(node_with_key, node_without_key, missing_key): + def raise_key_inhomogeneity_error( + node_with_key, node_without_key, missing_key + ): message = " ".join( [ MESSAGE_PART_1, @@ -138,16 +146,24 @@ def check_nodes_homogeneous(named_nodes): first_node_keys = first_node._children.keys() node_keys = node._children.keys() if not first_node_keys == node_keys: - missing_keys = set(first_node_keys).difference(node_keys) - if missing_keys: # If the first_node has a key that node hasn't + missing_keys = set(first_node_keys).difference( + node_keys + ) + if ( + missing_keys + ): # If the first_node has a key that node hasn't raise_key_inhomogeneity_error( first_name, name, missing_keys.pop() ) else: # If If the node has a key that first_node doesn't have missing_key = ( - set(node_keys).difference(first_node_keys).pop() + set(node_keys) + .difference(first_node_keys) + .pop() + ) + raise_key_inhomogeneity_error( + name, first_name, missing_key ) - raise_key_inhomogeneity_error(name, first_name, missing_key) children.update(extract_named_children(node)) check_nodes_homogeneous(children) elif isinstance(first_node, float) or isinstance(first_node, int): @@ -216,7 +232,9 @@ def __getitem__(self, key: str) -> Any: and values[0].dtype.names ): # Check if all values have the same dtype - dtypes_match = all(val.dtype == values[0].dtype for val in values) + dtypes_match = all( + val.dtype == values[0].dtype for val in values + ) if not dtypes_match: # Find the union of all field names across all values, preserving first seen order @@ -229,7 +247,9 @@ def __getitem__(self, key: str) -> Any: seen.add(field) # Create unified dtype with all fields - unified_dtype = numpy.dtype([(f, " Any: casted[field] = val[field] values_cast.append(casted) - default = numpy.zeros(len(values_cast[0]), dtype=unified_dtype) + default = numpy.zeros( + len(values_cast[0]), dtype=unified_dtype + ) # Fill with NaN for field in unified_dtype.names: default[field] = numpy.nan @@ -267,9 +289,9 @@ def __getitem__(self, key: str) -> Any: ) # If the result is not a leaf, wrap the result in a vectorial node. - if numpy.issubdtype(result.dtype, numpy.record) or numpy.issubdtype( - result.dtype, numpy.void - ): + if numpy.issubdtype( + result.dtype, numpy.record + ) or numpy.issubdtype(result.dtype, numpy.void): return VectorialParameterNodeAtInstant( self._name, result.view(numpy.recarray), self._instant_str ) diff --git a/policyengine_core/populations/population.py b/policyengine_core/populations/population.py index fcb7f1b2..c2d9d0f4 100644 --- a/policyengine_core/populations/population.py +++ b/policyengine_core/populations/population.py @@ -72,7 +72,9 @@ def check_array_compatible_with_entity(self, array: numpy.ndarray) -> None: ) ) - def check_period_validity(self, variable_name: str, period: Period) -> None: + def check_period_validity( + self, variable_name: str, period: Period + ) -> None: if period is None: stack = traceback.extract_stack() filename, line_number, function_name, line_of_code = stack[-3] @@ -141,7 +143,9 @@ def __call__( variable_name, period, **calculate_kwargs ) else: - return self.simulation.calculate(variable_name, period, **calculate_kwargs) + return self.simulation.calculate( + variable_name, period, **calculate_kwargs + ) # Helpers @@ -166,7 +170,9 @@ def get_memory_usage(self, variables: List[str] = None): for holder_memory_usage in holders_memory_usage.values() ) - return dict(total_nb_bytes=total_memory_usage, by_variable=holders_memory_usage) + return dict( + total_nb_bytes=total_memory_usage, by_variable=holders_memory_usage + ) @projectors.projectable def has_role(self, role: Role) -> ArrayLike: @@ -182,7 +188,10 @@ def has_role(self, role: Role) -> ArrayLike: group_population = self.simulation.get_population(role.entity.plural) if role.subroles: return numpy.logical_or.reduce( - [group_population.members_role == subrole for subrole in role.subroles] + [ + group_population.members_role == subrole + for subrole in role.subroles + ] ) else: return group_population.members_role == role @@ -230,7 +239,9 @@ def get_rank( # If entity is for instance 'person.household', we get the reference entity 'household' behind the projector entity = ( - entity if not isinstance(entity, Projector) else entity.reference_entity + entity + if not isinstance(entity, Projector) + else entity.reference_entity ) positions = entity.members_position @@ -241,7 +252,9 @@ def get_rank( # Matrix: the value in line i and column j is the value of criteria for the jth person of the ith entity matrix = numpy.asarray( [ - entity.value_nth_person(k, filtered_criteria, default=numpy.inf) + entity.value_nth_person( + k, filtered_criteria, default=numpy.inf + ) for k in range(biggest_entity_size) ] ).transpose() diff --git a/tests/core/test_pandas3_compatibility.py b/tests/core/test_pandas3_compatibility.py index d2b53db2..1cce2978 100644 --- a/tests/core/test_pandas3_compatibility.py +++ b/tests/core/test_pandas3_compatibility.py @@ -25,7 +25,10 @@ def test_filled_array_with_string_dtype(self): # Create a minimal entity for testing entity = Entity( - key="person", plural="people", label="Person", doc="Test person entity" + key="person", + plural="people", + label="Person", + doc="Test person entity", ) # Create a population with some count @@ -54,14 +57,19 @@ def test_filled_array_with_pyarrow_string_dtype(self): from policyengine_core.entities import Entity entity = Entity( - key="person", plural="people", label="Person", doc="Test person entity" + key="person", + plural="people", + label="Person", + doc="Test person entity", ) population = Population(entity) population.count = 5 # PyArrow string dtype (proper way to create it) arrow_string_dtype = pd.ArrowDtype(pa.string()) - result = population.filled_array("test_value", dtype=arrow_string_dtype) + result = population.filled_array( + "test_value", dtype=arrow_string_dtype + ) assert len(result) == 5 @@ -164,5 +172,9 @@ def test_is_pandas_extension_dtype(self): assert isinstance(pd.StringDtype(), pd.api.extensions.ExtensionDtype) # numpy dtypes are not - assert not isinstance(np.dtype("float64"), pd.api.extensions.ExtensionDtype) - assert not isinstance(np.dtype("object"), pd.api.extensions.ExtensionDtype) + assert not isinstance( + np.dtype("float64"), pd.api.extensions.ExtensionDtype + ) + assert not isinstance( + np.dtype("object"), pd.api.extensions.ExtensionDtype + ) From 3552fd64ab8062ce514ce445b026933d2babb136 Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Sat, 24 Jan 2026 10:30:06 -0500 Subject: [PATCH 5/5] Format with black 26.1.0 to match CI --- policyengine_core/charts/formatting.py | 1 - policyengine_core/populations/population.py | 8 ++------ 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/policyengine_core/charts/formatting.py b/policyengine_core/charts/formatting.py index 8fcf536a..182774c0 100644 --- a/policyengine_core/charts/formatting.py +++ b/policyengine_core/charts/formatting.py @@ -1,7 +1,6 @@ import plotly.graph_objects as go from IPython.display import HTML - GREEN = "#29d40f" LIGHT_GREEN = "#C5E1A5" DARK_GREEN = "#558B2F" diff --git a/policyengine_core/populations/population.py b/policyengine_core/populations/population.py index c2d9d0f4..988485ec 100644 --- a/policyengine_core/populations/population.py +++ b/policyengine_core/populations/population.py @@ -78,17 +78,13 @@ def check_period_validity( if period is None: stack = traceback.extract_stack() filename, line_number, function_name, line_of_code = stack[-3] - raise ValueError( - """ + raise ValueError(""" You requested computation of variable "{}", but you did not specify on which period in "{}:{}": {} When you request the computation of a variable within a formula, you must always specify the period as the second parameter. The convention is to call this parameter "period". For example: computed_salary = person('salary', period). See more information at . -""".format( - variable_name, filename, line_number, line_of_code - ) - ) +""".format(variable_name, filename, line_number, line_of_code)) def __call__( self,