PolicyEngine · MaxGhenis · Jan 24, 2026 · Jan 24, 2026 · Jan 24, 2026 · Jan 24, 2026
diff --git a/changelog_entry.yaml b/changelog_entry.yaml
@@ -0,0 +1,4 @@
+- bump: patch
+  changes:
+    fixed:
+      - Fixed pandas 3.0 compatibility issues with StringDtype and StringArray
diff --git a/policyengine_core/charts/formatting.py b/policyengine_core/charts/formatting.py
@@ -1,7 +1,6 @@
 import plotly.graph_objects as go
 from IPython.display import HTML
 
-
 GREEN = "#29d40f"
 LIGHT_GREEN = "#C5E1A5"
 DARK_GREEN = "#558B2F"

diff --git a/policyengine_core/parameters/vectorial_parameter_node_at_instant.py b/policyengine_core/parameters/vectorial_parameter_node_at_instant.py
@@ -196,7 +196,10 @@ def __getitem__(self, key: str) -> Any:
         if isinstance(key, str):
             return self.__getattr__(key)
         # If the key is a vector, e.g. ['zone_1', 'zone_2', 'zone_1']
-        elif isinstance(key, numpy.ndarray):
+        # Convert pandas arrays (e.g., StringArray from pandas 3) to numpy
+        if hasattr(key, "__array__") and not isinstance(key, numpy.ndarray):
+            key = numpy.asarray(key)
+        if isinstance(key, numpy.ndarray):
             if not numpy.issubdtype(key.dtype, numpy.str_):
                 # In case the key is not a string vector, stringify it
                 if key.dtype == object and issubclass(type(key[0]), Enum):

diff --git a/policyengine_core/populations/population.py b/policyengine_core/populations/population.py
@@ -41,6 +41,12 @@ def empty_array(self) -> numpy.ndarray:
         return numpy.zeros(self.count)
 
     def filled_array(self, value: Any, dtype: Any = None) -> numpy.ndarray:
+        import pandas as pd
+
+        # Handle pandas extension dtypes (e.g., StringDtype in pandas 3)
+        # numpy.full() cannot handle these, so convert to object dtype
+        if isinstance(dtype, pd.api.extensions.ExtensionDtype):
+            dtype = object
         return numpy.full(self.count, value, dtype)
 
     def __getattr__(self, attribute: str) -> Any:
@@ -72,17 +78,13 @@ def check_period_validity(
         if period is None:
             stack = traceback.extract_stack()
             filename, line_number, function_name, line_of_code = stack[-3]
-            raise ValueError(
-                """
+            raise ValueError("""
 You requested computation of variable "{}", but you did not specify on which period in "{}:{}":
     {}
 When you request the computation of a variable within a formula, you must always specify the period as the second parameter. The convention is to call this parameter "period". For example:
     computed_salary = person('salary', period).
 See more information at <https://openfisca.org/doc/coding-the-legislation/35_periods.html#periods-in-variable-definition>.
-""".format(
-                    variable_name, filename, line_number, line_of_code
-                )
-            )
+""".format(variable_name, filename, line_number, line_of_code))
 
     def __call__(
         self,

diff --git a/tests/core/test_pandas3_compatibility.py b/tests/core/test_pandas3_compatibility.py
@@ -0,0 +1,180 @@
+"""
+Tests for pandas 3.0.0 compatibility.
+
+These tests verify that policyengine-core works correctly with pandas 3.0.0,
+which introduces:
+1. PyArrow-backed strings as default (StringDtype)
+2. Copy-on-Write by default
+"""
+
+import numpy as np
+import pandas as pd
+import pytest
+
+
+class TestFilledArrayWithStringDtype:
+    """Test that filled_array works with pandas StringDtype."""
+
+    def test_filled_array_with_string_dtype(self):
+        """
+        In pandas 3.0.0, string columns use StringDtype by default.
+        numpy.full() cannot handle StringDtype, so we need to handle this case.
+        """
+        from policyengine_core.populations.population import Population
+        from policyengine_core.entities import Entity
+
+        # Create a minimal entity for testing
+        entity = Entity(
+            key="person",
+            plural="people",
+            label="Person",
+            doc="Test person entity",
+        )
+
+        # Create a population with some count
+        population = Population(entity)
+        population.count = 5
+
+        # Test with regular numpy dtype - should work
+        result = population.filled_array("test_value", dtype=object)
+        assert len(result) == 5
+        assert all(v == "test_value" for v in result)
+
+        # Test with pandas StringDtype - this is what pandas 3 uses by default
+        # This should NOT raise an error
+        string_dtype = pd.StringDtype()
+        result = population.filled_array("test_value", dtype=string_dtype)
+        assert len(result) == 5
+        assert all(v == "test_value" for v in result)
+
+    def test_filled_array_with_pyarrow_string_dtype(self):
+        """
+        Test with PyArrow-backed string dtype, which pandas 3 uses by default.
+        """
+        pa = pytest.importorskip("pyarrow")
+
+        from policyengine_core.populations.population import Population
+        from policyengine_core.entities import Entity
+
+        entity = Entity(
+            key="person",
+            plural="people",
+            label="Person",
+            doc="Test person entity",
+        )
+        population = Population(entity)
+        population.count = 5
+
+        # PyArrow string dtype (proper way to create it)
+        arrow_string_dtype = pd.ArrowDtype(pa.string())
+        result = population.filled_array(
+            "test_value", dtype=arrow_string_dtype
+        )
+        assert len(result) == 5
+
+
+class TestParameterLookupWithStringArray:
+    """Test that parameter lookup works with pandas StringArray."""
+
+    def test_parameter_node_getitem_with_string_array(self):
+        """
+        In pandas 3.0.0, series.values.astype(str) returns a StringArray
+        instead of a numpy array. ParameterNodeAtInstant.__getitem__ should
+        handle this.
+        """
+        # Create a pandas StringArray (what pandas 3 returns)
+        string_array = pd.array(["value1", "value2", "value3"], dtype="string")
+
+        # Verify it's a StringArray (not numpy array)
+        assert not isinstance(string_array, np.ndarray)
+        assert hasattr(string_array, "__array__")
+
+        # Convert to numpy - this is what the fix should do
+        numpy_array = np.asarray(string_array)
+        assert isinstance(numpy_array, np.ndarray)
+
+    def test_vectorial_parameter_node_with_string_array(self):
+        """
+        VectorialParameterNodeAtInstant.__getitem__ should handle pandas
+        StringArray by converting it to numpy array.
+        """
+        from policyengine_core.parameters.vectorial_parameter_node_at_instant import (
+            VectorialParameterNodeAtInstant,
+        )
+
+        # Create a simple vectorial node for testing with proper structure
+        vector = np.array(
+            [(1.0, 2.0)],
+            dtype=[("zone_1", "float"), ("zone_2", "float")],
+        ).view(np.recarray)
+
+        node = VectorialParameterNodeAtInstant("test", vector, "2024-01-01")
+
+        # Test with numpy array - should work
+        key_numpy = np.array(["zone_1", "zone_2"])
+        result_numpy = node[key_numpy]
+        assert len(result_numpy) == 2
+
+        # Test with pandas StringArray - this is what pandas 3 returns
+        key_string_array = pd.array(["zone_1", "zone_2"], dtype="string")
+
+        # This should NOT raise TypeError: unhashable type: 'StringArray'
+        # The node should accept StringArray by converting to numpy
+        result_string_array = node[key_string_array]
+        assert len(result_string_array) == 2
+
+        # Results should be the same
+        np.testing.assert_array_equal(result_numpy, result_string_array)
+
+
+class TestMicroSeriesCompatibility:
+    """Test that MicroSeries operations work with pandas 3."""
+
+    def test_series_subclass_preserved(self):
+        """
+        Pandas 3.0.0 may change how Series subclasses are handled.
+        Operations should return the subclass, not plain Series.
+        """
+        # This test documents expected behavior that may break in pandas 3
+        df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+
+        # Test that operations preserve Series type
+        result = df["a"] + df["b"]
+        assert isinstance(result, pd.Series)
+
+        # With pandas 3, some operations may return different types
+        result = df["a"].astype(str)
+        # In pandas 3, this might return StringArray-backed Series
+        assert isinstance(result, pd.Series)
+
+
+class TestStringDtypeConversion:
+    """Test utilities for converting pandas StringDtype to numpy-compatible types."""
+
+    def test_convert_string_dtype_to_object(self):
+        """
+        When pandas StringDtype is passed to numpy functions,
+        we should convert it to object dtype.
+        """
+        string_dtype = pd.StringDtype()
+
+        # numpy.full doesn't understand StringDtype
+        with pytest.raises(TypeError):
+            np.full(5, "test", dtype=string_dtype)
+
+        # But it works with object dtype
+        result = np.full(5, "test", dtype=object)
+        assert len(result) == 5
+
+    def test_is_pandas_extension_dtype(self):
+        """Test detection of pandas extension dtypes."""
+        # pandas StringDtype is an ExtensionDtype
+        assert isinstance(pd.StringDtype(), pd.api.extensions.ExtensionDtype)
+
+        # numpy dtypes are not
+        assert not isinstance(
+            np.dtype("float64"), pd.api.extensions.ExtensionDtype
+        )
+        assert not isinstance(
+            np.dtype("object"), pd.api.extensions.ExtensionDtype
+        )