From bdd0e8bb82c586fd110087d05a805efab8e4fc71 Mon Sep 17 00:00:00 2001
From: Max Ghenis <mghenis@gmail.com>
Date: Sat, 24 Jan 2026 10:18:14 -0500
Subject: [PATCH 1/5] Add pandas 3.0 compatibility fixes

Fixes two issues that occur with pandas 3.0's new defaults:

1. filled_array() now handles pandas ExtensionDtype (StringDtype)
   - numpy.full() cannot handle StringDtype, so convert to object dtype
   - Fixes: TypeError: Cannot interpret '<StringDtype>' as a data type

2. VectorialParameterNodeAtInstant.__getitem__ now handles StringArray
   - pandas 3 returns StringArray instead of numpy array for string operations
   - Convert pandas arrays to numpy before processing
   - Fixes: TypeError: unhashable type: 'StringArray'

Added comprehensive tests that verify both fixes work correctly.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 .../vectorial_parameter_node_at_instant.py    |   5 +-
 policyengine_core/populations/population.py   |   6 +
 tests/core/test_pandas3_compatibility.py      | 164 ++++++++++++++++++
 3 files changed, 174 insertions(+), 1 deletion(-)
 create mode 100644 tests/core/test_pandas3_compatibility.py
diff --git a/policyengine_core/parameters/vectorial_parameter_node_at_instant.py b/policyengine_core/parameters/vectorial_parameter_node_at_instant.py
index 9a7ce385..c5ce1367 100644
--- a/policyengine_core/parameters/vectorial_parameter_node_at_instant.py
+++ b/policyengine_core/parameters/vectorial_parameter_node_at_instant.py
@@ -196,7 +196,10 @@ def __getitem__(self, key: str) -> Any:
         if isinstance(key, str):
             return self.__getattr__(key)
         # If the key is a vector, e.g. ['zone_1', 'zone_2', 'zone_1']
-        elif isinstance(key, numpy.ndarray):
+        # Convert pandas arrays (e.g., StringArray from pandas 3) to numpy
+        if hasattr(key, "__array__") and not isinstance(key, numpy.ndarray):
+            key = numpy.asarray(key)
+        if isinstance(key, numpy.ndarray):
             if not numpy.issubdtype(key.dtype, numpy.str_):
                 # In case the key is not a string vector, stringify it
                 if key.dtype == object and issubclass(type(key[0]), Enum):
diff --git a/policyengine_core/populations/population.py b/policyengine_core/populations/population.py
index a3c9f5aa..c2d9d0f4 100644
--- a/policyengine_core/populations/population.py
+++ b/policyengine_core/populations/population.py
@@ -41,6 +41,12 @@ def empty_array(self) -> numpy.ndarray:
         return numpy.zeros(self.count)
 
     def filled_array(self, value: Any, dtype: Any = None) -> numpy.ndarray:
+        import pandas as pd
+
+        # Handle pandas extension dtypes (e.g., StringDtype in pandas 3)
+        # numpy.full() cannot handle these, so convert to object dtype
+        if isinstance(dtype, pd.api.extensions.ExtensionDtype):
+            dtype = object
         return numpy.full(self.count, value, dtype)
 
     def __getattr__(self, attribute: str) -> Any:
diff --git a/tests/core/test_pandas3_compatibility.py b/tests/core/test_pandas3_compatibility.py
new file mode 100644
index 00000000..234a2573
--- /dev/null
+++ b/tests/core/test_pandas3_compatibility.py
@@ -0,0 +1,164 @@
+"""
+Tests for pandas 3.0.0 compatibility.
+
+These tests verify that policyengine-core works correctly with pandas 3.0.0,
+which introduces:
+1. PyArrow-backed strings as default (StringDtype)
+2. Copy-on-Write by default
+"""
+
+import numpy as np
+import pandas as pd
+import pytest
+
+
+class TestFilledArrayWithStringDtype:
+    """Test that filled_array works with pandas StringDtype."""
+
+    def test_filled_array_with_string_dtype(self):
+        """
+        In pandas 3.0.0, string columns use StringDtype by default.
+        numpy.full() cannot handle StringDtype, so we need to handle this case.
+        """
+        from policyengine_core.populations.population import Population
+        from policyengine_core.entities import Entity
+
+        # Create a minimal entity for testing
+        entity = Entity(key="person", plural="people", label="Person", doc="Test person entity")
+
+        # Create a population with some count
+        population = Population(entity)
+        population.count = 5
+
+        # Test with regular numpy dtype - should work
+        result = population.filled_array("test_value", dtype=object)
+        assert len(result) == 5
+        assert all(v == "test_value" for v in result)
+
+        # Test with pandas StringDtype - this is what pandas 3 uses by default
+        # This should NOT raise an error
+        string_dtype = pd.StringDtype()
+        result = population.filled_array("test_value", dtype=string_dtype)
+        assert len(result) == 5
+        assert all(v == "test_value" for v in result)
+
+    def test_filled_array_with_pyarrow_string_dtype(self):
+        """
+        Test with PyArrow-backed string dtype, which pandas 3 uses by default.
+        """
+        pa = pytest.importorskip("pyarrow")
+
+        from policyengine_core.populations.population import Population
+        from policyengine_core.entities import Entity
+
+        entity = Entity(key="person", plural="people", label="Person", doc="Test person entity")
+        population = Population(entity)
+        population.count = 5
+
+        # PyArrow string dtype (proper way to create it)
+        arrow_string_dtype = pd.ArrowDtype(pa.string())
+        result = population.filled_array("test_value", dtype=arrow_string_dtype)
+        assert len(result) == 5
+
+
+class TestParameterLookupWithStringArray:
+    """Test that parameter lookup works with pandas StringArray."""
+
+    def test_parameter_node_getitem_with_string_array(self):
+        """
+        In pandas 3.0.0, series.values.astype(str) returns a StringArray
+        instead of a numpy array. ParameterNodeAtInstant.__getitem__ should
+        handle this.
+        """
+        # Create a pandas StringArray (what pandas 3 returns)
+        string_array = pd.array(["value1", "value2", "value3"], dtype="string")
+
+        # Verify it's a StringArray (not numpy array)
+        assert not isinstance(string_array, np.ndarray)
+        assert hasattr(string_array, "__array__")
+
+        # Convert to numpy - this is what the fix should do
+        numpy_array = np.asarray(string_array)
+        assert isinstance(numpy_array, np.ndarray)
+
+    def test_vectorial_parameter_node_with_string_array(self):
+        """
+        VectorialParameterNodeAtInstant.__getitem__ should handle pandas
+        StringArray by converting it to numpy array.
+        """
+        from policyengine_core.parameters.vectorial_parameter_node_at_instant import (
+            VectorialParameterNodeAtInstant,
+        )
+
+        # Create a simple vectorial node for testing with proper structure
+        vector = np.array(
+            [(1.0, 2.0)],
+            dtype=[("zone_1", "float"), ("zone_2", "float")],
+        ).view(np.recarray)
+
+        node = VectorialParameterNodeAtInstant("test", vector, "2024-01-01")
+
+        # Test with numpy array - should work
+        key_numpy = np.array(["zone_1", "zone_2"])
+        result_numpy = node[key_numpy]
+        assert len(result_numpy) == 2
+
+        # Test with pandas StringArray - this is what pandas 3 returns
+        key_string_array = pd.array(["zone_1", "zone_2"], dtype="string")
+
+        # This should NOT raise TypeError: unhashable type: 'StringArray'
+        # The node should accept StringArray by converting to numpy
+        result_string_array = node[key_string_array]
+        assert len(result_string_array) == 2
+
+        # Results should be the same
+        np.testing.assert_array_equal(result_numpy, result_string_array)
+
+
+class TestMicroSeriesCompatibility:
+    """Test that MicroSeries operations work with pandas 3."""
+
+    def test_series_subclass_preserved(self):
+        """
+        Pandas 3.0.0 may change how Series subclasses are handled.
+        Operations should return the subclass, not plain Series.
+        """
+        # This test documents expected behavior that may break in pandas 3
+        df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+
+        # Test that operations preserve Series type
+        result = df["a"] + df["b"]
+        assert isinstance(result, pd.Series)
+
+        # With pandas 3, some operations may return different types
+        result = df["a"].astype(str)
+        # In pandas 3, this might return StringArray-backed Series
+        assert isinstance(result, pd.Series)
+
+
+class TestStringDtypeConversion:
+    """Test utilities for converting pandas StringDtype to numpy-compatible types."""
+
+    def test_convert_string_dtype_to_object(self):
+        """
+        When pandas StringDtype is passed to numpy functions,
+        we should convert it to object dtype.
+        """
+        string_dtype = pd.StringDtype()
+
+        # numpy.full doesn't understand StringDtype
+        with pytest.raises(TypeError):
+            np.full(5, "test", dtype=string_dtype)
+
+        # But it works with object dtype
+        result = np.full(5, "test", dtype=object)
+        assert len(result) == 5
+
+    def test_is_pandas_extension_dtype(self):
+        """Test detection of pandas extension dtypes."""
+        # pandas StringDtype is an ExtensionDtype
+        assert isinstance(pd.StringDtype(), pd.api.extensions.ExtensionDtype)
+
+        # numpy dtypes are not
+        assert not isinstance(np.dtype("float64"), pd.api.extensions.ExtensionDtype)
+        assert not isinstance(np.dtype("object"), pd.api.extensions.ExtensionDtype)

From f53c0d7be92371a3624cba7060cb15f3f04eee03 Mon Sep 17 00:00:00 2001
From: Max Ghenis <mghenis@gmail.com>
Date: Sat, 24 Jan 2026 10:20:26 -0500
Subject: [PATCH 2/5] Add changelog entry

---
 changelog_entry.yaml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/changelog_entry.yaml b/changelog_entry.yaml
index e69de29b..fdbcd7d9 100644
--- a/changelog_entry.yaml
+++ b/changelog_entry.yaml
@@ -0,0 +1,4 @@
+- bump: patch
+  changes:
+    fixed:
+      - Fixed pandas 3.0 compatibility issues with StringDtype and StringArray

From 9136ff7ff74ca0f3086b95c7bd2fdcc9834f7f3c Mon Sep 17 00:00:00 2001
From: Max Ghenis <mghenis@gmail.com>
Date: Sat, 24 Jan 2026 10:22:00 -0500
Subject: [PATCH 3/5] Format with black

---
 .../vectorial_parameter_node_at_instant.py    | 56 ++++++-------------
 policyengine_core/populations/population.py   | 25 ++-------
 tests/core/test_pandas3_compatibility.py      |  8 ++-
 3 files changed, 29 insertions(+), 60 deletions(-)

diff --git a/policyengine_core/parameters/vectorial_parameter_node_at_instant.py b/policyengine_core/parameters/vectorial_parameter_node_at_instant.py
index c5ce1367..514b1714 100644
--- a/policyengine_core/parameters/vectorial_parameter_node_at_instant.py
+++ b/policyengine_core/parameters/vectorial_parameter_node_at_instant.py
@@ -31,9 +31,7 @@ def build_from_node(
                     VectorialParameterNodeAtInstant.build_from_node(
                         node[subnode_name]
                     ).vector
-                    if isinstance(
-                        node[subnode_name], parameters.ParameterNodeAtInstant
-                    )
+                    if isinstance(node[subnode_name], parameters.ParameterNodeAtInstant)
                     else node[subnode_name]
                 )
                 for subnode_name in subnodes_name
@@ -46,15 +44,9 @@ def build_from_node(
             dtype=[
                 (
                     subnode_name,
-                    (
-                        subnode.dtype
-                        if isinstance(subnode, numpy.recarray)
-                        else "float"
-                    ),
-                )
-                for (subnode_name, subnode) in zip(
-                    subnodes_name, vectorial_subnodes
+                    (subnode.dtype if isinstance(subnode, numpy.recarray) else "float"),
                 )
+                for (subnode_name, subnode) in zip(subnodes_name, vectorial_subnodes)
             ],
         )
 
@@ -68,12 +60,12 @@ def check_node_vectorisable(node: "ParameterNode") -> None:
         Check that a node can be casted to a vectorial node, in order to be able to use fancy indexing.
         """
         MESSAGE_PART_1 = "Cannot use fancy indexing on parameter node '{}', as"
-        MESSAGE_PART_3 = "To use fancy indexing on parameter node, its children must be homogenous."
+        MESSAGE_PART_3 = (
+            "To use fancy indexing on parameter node, its children must be homogenous."
+        )
         MESSAGE_PART_4 = "See more at <https://openfisca.org/doc/coding-the-legislation/legislation_parameters#computing-a-parameter-that-depends-on-a-variable-fancy-indexing>."
 
-        def raise_key_inhomogeneity_error(
-            node_with_key, node_without_key, missing_key
-        ):
+        def raise_key_inhomogeneity_error(node_with_key, node_without_key, missing_key):
             message = " ".join(
                 [
                     MESSAGE_PART_1,
@@ -146,24 +138,16 @@ def check_nodes_homogeneous(named_nodes):
                     first_node_keys = first_node._children.keys()
                     node_keys = node._children.keys()
                     if not first_node_keys == node_keys:
-                        missing_keys = set(first_node_keys).difference(
-                            node_keys
-                        )
-                        if (
-                            missing_keys
-                        ):  # If the first_node has a key that node hasn't
+                        missing_keys = set(first_node_keys).difference(node_keys)
+                        if missing_keys:  # If the first_node has a key that node hasn't
                             raise_key_inhomogeneity_error(
                                 first_name, name, missing_keys.pop()
                             )
                         else:  # If If the node has a key that first_node doesn't have
                             missing_key = (
-                                set(node_keys)
-                                .difference(first_node_keys)
-                                .pop()
-                            )
-                            raise_key_inhomogeneity_error(
-                                name, first_name, missing_key
+                                set(node_keys).difference(first_node_keys).pop()
                             )
+                            raise_key_inhomogeneity_error(name, first_name, missing_key)
                     children.update(extract_named_children(node))
                 check_nodes_homogeneous(children)
             elif isinstance(first_node, float) or isinstance(first_node, int):
@@ -232,9 +216,7 @@ def __getitem__(self, key: str) -> Any:
                 and values[0].dtype.names
             ):
                 # Check if all values have the same dtype
-                dtypes_match = all(
-                    val.dtype == values[0].dtype for val in values
-                )
+                dtypes_match = all(val.dtype == values[0].dtype for val in values)
 
                 if not dtypes_match:
                     # Find the union of all field names across all values, preserving first seen order
@@ -247,9 +229,7 @@ def __getitem__(self, key: str) -> Any:
                                 seen.add(field)
 
                     # Create unified dtype with all fields
-                    unified_dtype = numpy.dtype(
-                        [(f, "<f8") for f in all_fields]
-                    )
+                    unified_dtype = numpy.dtype([(f, "<f8") for f in all_fields])
 
                     # Cast all values to unified dtype
                     values_cast = []
@@ -259,9 +239,7 @@ def __getitem__(self, key: str) -> Any:
                             casted[field] = val[field]
                         values_cast.append(casted)
 
-                    default = numpy.zeros(
-                        len(values_cast[0]), dtype=unified_dtype
-                    )
+                    default = numpy.zeros(len(values_cast[0]), dtype=unified_dtype)
                     # Fill with NaN
                     for field in unified_dtype.names:
                         default[field] = numpy.nan
@@ -289,9 +267,9 @@ def __getitem__(self, key: str) -> Any:
                     )
 
             # If the result is not a leaf, wrap the result in a vectorial node.
-            if numpy.issubdtype(
-                result.dtype, numpy.record
-            ) or numpy.issubdtype(result.dtype, numpy.void):
+            if numpy.issubdtype(result.dtype, numpy.record) or numpy.issubdtype(
+                result.dtype, numpy.void
+            ):
                 return VectorialParameterNodeAtInstant(
                     self._name, result.view(numpy.recarray), self._instant_str
                 )
diff --git a/policyengine_core/populations/population.py b/policyengine_core/populations/population.py
index c2d9d0f4..fcb7f1b2 100644
--- a/policyengine_core/populations/population.py
+++ b/policyengine_core/populations/population.py
@@ -72,9 +72,7 @@ def check_array_compatible_with_entity(self, array: numpy.ndarray) -> None:
                 )
             )
 
-    def check_period_validity(
-        self, variable_name: str, period: Period
-    ) -> None:
+    def check_period_validity(self, variable_name: str, period: Period) -> None:
         if period is None:
             stack = traceback.extract_stack()
             filename, line_number, function_name, line_of_code = stack[-3]
@@ -143,9 +141,7 @@ def __call__(
                 variable_name, period, **calculate_kwargs
             )
         else:
-            return self.simulation.calculate(
-                variable_name, period, **calculate_kwargs
-            )
+            return self.simulation.calculate(variable_name, period, **calculate_kwargs)
 
     # Helpers
 
@@ -170,9 +166,7 @@ def get_memory_usage(self, variables: List[str] = None):
             for holder_memory_usage in holders_memory_usage.values()
         )
 
-        return dict(
-            total_nb_bytes=total_memory_usage, by_variable=holders_memory_usage
-        )
+        return dict(total_nb_bytes=total_memory_usage, by_variable=holders_memory_usage)
 
     @projectors.projectable
     def has_role(self, role: Role) -> ArrayLike:
@@ -188,10 +182,7 @@ def has_role(self, role: Role) -> ArrayLike:
         group_population = self.simulation.get_population(role.entity.plural)
         if role.subroles:
             return numpy.logical_or.reduce(
-                [
-                    group_population.members_role == subrole
-                    for subrole in role.subroles
-                ]
+                [group_population.members_role == subrole for subrole in role.subroles]
             )
         else:
             return group_population.members_role == role
@@ -239,9 +230,7 @@ def get_rank(
 
         # If entity is for instance 'person.household', we get the reference entity 'household' behind the projector
         entity = (
-            entity
-            if not isinstance(entity, Projector)
-            else entity.reference_entity
+            entity if not isinstance(entity, Projector) else entity.reference_entity
         )
 
         positions = entity.members_position
@@ -252,9 +241,7 @@ def get_rank(
         # Matrix: the value in line i and column j is the value of criteria for the jth person of the ith entity
         matrix = numpy.asarray(
             [
-                entity.value_nth_person(
-                    k, filtered_criteria, default=numpy.inf
-                )
+                entity.value_nth_person(k, filtered_criteria, default=numpy.inf)
                 for k in range(biggest_entity_size)
             ]
         ).transpose()
diff --git a/tests/core/test_pandas3_compatibility.py b/tests/core/test_pandas3_compatibility.py
index 234a2573..d2b53db2 100644
--- a/tests/core/test_pandas3_compatibility.py
+++ b/tests/core/test_pandas3_compatibility.py
@@ -24,7 +24,9 @@ def test_filled_array_with_string_dtype(self):
         from policyengine_core.entities import Entity
 
         # Create a minimal entity for testing
-        entity = Entity(key="person", plural="people", label="Person", doc="Test person entity")
+        entity = Entity(
+            key="person", plural="people", label="Person", doc="Test person entity"
+        )
 
         # Create a population with some count
         population = Population(entity)
@@ -51,7 +53,9 @@ def test_filled_array_with_pyarrow_string_dtype(self):
         from policyengine_core.populations.population import Population
         from policyengine_core.entities import Entity
 
-        entity = Entity(key="person", plural="people", label="Person", doc="Test person entity")
+        entity = Entity(
+            key="person", plural="people", label="Person", doc="Test person entity"
+        )
         population = Population(entity)
         population.count = 5
 

From af37b0c78f965a1e3347a63765bf974021052c19 Mon Sep 17 00:00:00 2001
From: Max Ghenis <mghenis@gmail.com>
Date: Sat, 24 Jan 2026 10:24:03 -0500
Subject: [PATCH 4/5] Format with black -l 79

---
 .../vectorial_parameter_node_at_instant.py    | 56 +++++++++++++------
 policyengine_core/populations/population.py   | 25 +++++++--
 tests/core/test_pandas3_compatibility.py      | 22 ++++++--
 3 files changed, 75 insertions(+), 28 deletions(-)

diff --git a/policyengine_core/parameters/vectorial_parameter_node_at_instant.py b/policyengine_core/parameters/vectorial_parameter_node_at_instant.py
index 514b1714..c5ce1367 100644
--- a/policyengine_core/parameters/vectorial_parameter_node_at_instant.py
+++ b/policyengine_core/parameters/vectorial_parameter_node_at_instant.py
@@ -31,7 +31,9 @@ def build_from_node(
                     VectorialParameterNodeAtInstant.build_from_node(
                         node[subnode_name]
                     ).vector
-                    if isinstance(node[subnode_name], parameters.ParameterNodeAtInstant)
+                    if isinstance(
+                        node[subnode_name], parameters.ParameterNodeAtInstant
+                    )
                     else node[subnode_name]
                 )
                 for subnode_name in subnodes_name
@@ -44,9 +46,15 @@ def build_from_node(
             dtype=[
                 (
                     subnode_name,
-                    (subnode.dtype if isinstance(subnode, numpy.recarray) else "float"),
+                    (
+                        subnode.dtype
+                        if isinstance(subnode, numpy.recarray)
+                        else "float"
+                    ),
+                )
+                for (subnode_name, subnode) in zip(
+                    subnodes_name, vectorial_subnodes
                 )
-                for (subnode_name, subnode) in zip(subnodes_name, vectorial_subnodes)
             ],
         )
 
@@ -60,12 +68,12 @@ def check_node_vectorisable(node: "ParameterNode") -> None:
         Check that a node can be casted to a vectorial node, in order to be able to use fancy indexing.
         """
         MESSAGE_PART_1 = "Cannot use fancy indexing on parameter node '{}', as"
-        MESSAGE_PART_3 = (
-            "To use fancy indexing on parameter node, its children must be homogenous."
-        )
+        MESSAGE_PART_3 = "To use fancy indexing on parameter node, its children must be homogenous."
         MESSAGE_PART_4 = "See more at <https://openfisca.org/doc/coding-the-legislation/legislation_parameters#computing-a-parameter-that-depends-on-a-variable-fancy-indexing>."
 
-        def raise_key_inhomogeneity_error(node_with_key, node_without_key, missing_key):
+        def raise_key_inhomogeneity_error(
+            node_with_key, node_without_key, missing_key
+        ):
             message = " ".join(
                 [
                     MESSAGE_PART_1,
@@ -138,16 +146,24 @@ def check_nodes_homogeneous(named_nodes):
                     first_node_keys = first_node._children.keys()
                     node_keys = node._children.keys()
                     if not first_node_keys == node_keys:
-                        missing_keys = set(first_node_keys).difference(node_keys)
-                        if missing_keys:  # If the first_node has a key that node hasn't
+                        missing_keys = set(first_node_keys).difference(
+                            node_keys
+                        )
+                        if (
+                            missing_keys
+                        ):  # If the first_node has a key that node hasn't
                             raise_key_inhomogeneity_error(
                                 first_name, name, missing_keys.pop()
                             )
                         else:  # If If the node has a key that first_node doesn't have
                             missing_key = (
-                                set(node_keys).difference(first_node_keys).pop()
+                                set(node_keys)
+                                .difference(first_node_keys)
+                                .pop()
+                            )
+                            raise_key_inhomogeneity_error(
+                                name, first_name, missing_key
                             )
-                            raise_key_inhomogeneity_error(name, first_name, missing_key)
                     children.update(extract_named_children(node))
                 check_nodes_homogeneous(children)
             elif isinstance(first_node, float) or isinstance(first_node, int):
@@ -216,7 +232,9 @@ def __getitem__(self, key: str) -> Any:
                 and values[0].dtype.names
             ):
                 # Check if all values have the same dtype
-                dtypes_match = all(val.dtype == values[0].dtype for val in values)
+                dtypes_match = all(
+                    val.dtype == values[0].dtype for val in values
+                )
 
                 if not dtypes_match:
                     # Find the union of all field names across all values, preserving first seen order
@@ -229,7 +247,9 @@ def __getitem__(self, key: str) -> Any:
                                 seen.add(field)
 
                     # Create unified dtype with all fields
-                    unified_dtype = numpy.dtype([(f, "<f8") for f in all_fields])
+                    unified_dtype = numpy.dtype(
+                        [(f, "<f8") for f in all_fields]
+                    )
 
                     # Cast all values to unified dtype
                     values_cast = []
@@ -239,7 +259,9 @@ def __getitem__(self, key: str) -> Any:
                             casted[field] = val[field]
                         values_cast.append(casted)
 
-                    default = numpy.zeros(len(values_cast[0]), dtype=unified_dtype)
+                    default = numpy.zeros(
+                        len(values_cast[0]), dtype=unified_dtype
+                    )
                     # Fill with NaN
                     for field in unified_dtype.names:
                         default[field] = numpy.nan
@@ -267,9 +289,9 @@ def __getitem__(self, key: str) -> Any:
                     )
 
             # If the result is not a leaf, wrap the result in a vectorial node.
-            if numpy.issubdtype(result.dtype, numpy.record) or numpy.issubdtype(
-                result.dtype, numpy.void
-            ):
+            if numpy.issubdtype(
+                result.dtype, numpy.record
+            ) or numpy.issubdtype(result.dtype, numpy.void):
                 return VectorialParameterNodeAtInstant(
                     self._name, result.view(numpy.recarray), self._instant_str
                 )
diff --git a/policyengine_core/populations/population.py b/policyengine_core/populations/population.py
index fcb7f1b2..c2d9d0f4 100644
--- a/policyengine_core/populations/population.py
+++ b/policyengine_core/populations/population.py
@@ -72,7 +72,9 @@ def check_array_compatible_with_entity(self, array: numpy.ndarray) -> None:
                 )
             )
 
-    def check_period_validity(self, variable_name: str, period: Period) -> None:
+    def check_period_validity(
+        self, variable_name: str, period: Period
+    ) -> None:
         if period is None:
             stack = traceback.extract_stack()
             filename, line_number, function_name, line_of_code = stack[-3]
@@ -141,7 +143,9 @@ def __call__(
                 variable_name, period, **calculate_kwargs
             )
         else:
-            return self.simulation.calculate(variable_name, period, **calculate_kwargs)
+            return self.simulation.calculate(
+                variable_name, period, **calculate_kwargs
+            )
 
     # Helpers
 
@@ -166,7 +170,9 @@ def get_memory_usage(self, variables: List[str] = None):
             for holder_memory_usage in holders_memory_usage.values()
         )
 
-        return dict(total_nb_bytes=total_memory_usage, by_variable=holders_memory_usage)
+        return dict(
+            total_nb_bytes=total_memory_usage, by_variable=holders_memory_usage
+        )
 
     @projectors.projectable
     def has_role(self, role: Role) -> ArrayLike:
@@ -182,7 +188,10 @@ def has_role(self, role: Role) -> ArrayLike:
         group_population = self.simulation.get_population(role.entity.plural)
         if role.subroles:
             return numpy.logical_or.reduce(
-                [group_population.members_role == subrole for subrole in role.subroles]
+                [
+                    group_population.members_role == subrole
+                    for subrole in role.subroles
+                ]
             )
         else:
             return group_population.members_role == role
@@ -230,7 +239,9 @@ def get_rank(
 
         # If entity is for instance 'person.household', we get the reference entity 'household' behind the projector
         entity = (
-            entity if not isinstance(entity, Projector) else entity.reference_entity
+            entity
+            if not isinstance(entity, Projector)
+            else entity.reference_entity
         )
 
         positions = entity.members_position
@@ -241,7 +252,9 @@ def get_rank(
         # Matrix: the value in line i and column j is the value of criteria for the jth person of the ith entity
         matrix = numpy.asarray(
             [
-                entity.value_nth_person(k, filtered_criteria, default=numpy.inf)
+                entity.value_nth_person(
+                    k, filtered_criteria, default=numpy.inf
+                )
                 for k in range(biggest_entity_size)
             ]
         ).transpose()
diff --git a/tests/core/test_pandas3_compatibility.py b/tests/core/test_pandas3_compatibility.py
index d2b53db2..1cce2978 100644
--- a/tests/core/test_pandas3_compatibility.py
+++ b/tests/core/test_pandas3_compatibility.py
@@ -25,7 +25,10 @@ def test_filled_array_with_string_dtype(self):
 
         # Create a minimal entity for testing
         entity = Entity(
-            key="person", plural="people", label="Person", doc="Test person entity"
+            key="person",
+            plural="people",
+            label="Person",
+            doc="Test person entity",
         )
 
         # Create a population with some count
@@ -54,14 +57,19 @@ def test_filled_array_with_pyarrow_string_dtype(self):
         from policyengine_core.entities import Entity
 
         entity = Entity(
-            key="person", plural="people", label="Person", doc="Test person entity"
+            key="person",
+            plural="people",
+            label="Person",
+            doc="Test person entity",
         )
         population = Population(entity)
         population.count = 5
 
         # PyArrow string dtype (proper way to create it)
         arrow_string_dtype = pd.ArrowDtype(pa.string())
-        result = population.filled_array("test_value", dtype=arrow_string_dtype)
+        result = population.filled_array(
+            "test_value", dtype=arrow_string_dtype
+        )
         assert len(result) == 5
 
 
@@ -164,5 +172,9 @@ def test_is_pandas_extension_dtype(self):
         assert isinstance(pd.StringDtype(), pd.api.extensions.ExtensionDtype)
 
         # numpy dtypes are not
-        assert not isinstance(np.dtype("float64"), pd.api.extensions.ExtensionDtype)
-        assert not isinstance(np.dtype("object"), pd.api.extensions.ExtensionDtype)
+        assert not isinstance(
+            np.dtype("float64"), pd.api.extensions.ExtensionDtype
+        )
+        assert not isinstance(
+            np.dtype("object"), pd.api.extensions.ExtensionDtype
+        )

From 3552fd64ab8062ce514ce445b026933d2babb136 Mon Sep 17 00:00:00 2001
From: Max Ghenis <mghenis@gmail.com>
Date: Sat, 24 Jan 2026 10:30:06 -0500
Subject: [PATCH 5/5] Format with black 26.1.0 to match CI

---
 policyengine_core/charts/formatting.py      | 1 -
 policyengine_core/populations/population.py | 8 ++------
 2 files changed, 2 insertions(+), 7 deletions(-)

diff --git a/policyengine_core/charts/formatting.py b/policyengine_core/charts/formatting.py
index 8fcf536a..182774c0 100644
--- a/policyengine_core/charts/formatting.py
+++ b/policyengine_core/charts/formatting.py
@@ -1,7 +1,6 @@
 import plotly.graph_objects as go
 from IPython.display import HTML
 
-
 GREEN = "#29d40f"
 LIGHT_GREEN = "#C5E1A5"
 DARK_GREEN = "#558B2F"
diff --git a/policyengine_core/populations/population.py b/policyengine_core/populations/population.py
index c2d9d0f4..988485ec 100644
--- a/policyengine_core/populations/population.py
+++ b/policyengine_core/populations/population.py
@@ -78,17 +78,13 @@ def check_period_validity(
         if period is None:
             stack = traceback.extract_stack()
             filename, line_number, function_name, line_of_code = stack[-3]
-            raise ValueError(
-                """
+            raise ValueError("""
 You requested computation of variable "{}", but you did not specify on which period in "{}:{}":
     {}
 When you request the computation of a variable within a formula, you must always specify the period as the second parameter. The convention is to call this parameter "period". For example:
     computed_salary = person('salary', period).
 See more information at <https://openfisca.org/doc/coding-the-legislation/35_periods.html#periods-in-variable-definition>.
-""".format(
-                    variable_name, filename, line_number, line_of_code
-                )
-            )
+""".format(variable_name, filename, line_number, line_of_code))
 
     def __call__(
         self,