Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions changelog_entry.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
- bump: patch
changes:
fixed:
- Fixed pandas 3.0 compatibility in Enum.encode() by using positional access (.iloc[0]) for pandas Series instead of label-based access (array[0]), which fails with KeyError when Series has a non-integer index (fixes #427)
9 changes: 8 additions & 1 deletion policyengine_core/enums/enum.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,14 @@ def encode(cls, array: Union[EnumArray, np.ndarray]) -> EnumArray:
return array

# Handle Enum item arrays by extracting indices directly
if len(array) > 0 and isinstance(array[0], Enum):
# Use .iloc[0] for pandas Series to avoid KeyError with non-integer index
# (pandas 3.0 uses StringDtype by default, causing array[0] to do
# label-based lookup instead of positional access)
if len(array) > 0:
first_elem = array.iloc[0] if hasattr(array, "iloc") else array[0]
else:
first_elem = None
if first_elem is not None and isinstance(first_elem, Enum):
indices = np.array(
[item.index for item in array], dtype=ENUM_ARRAY_DTYPE
)
Expand Down
56 changes: 56 additions & 0 deletions tests/core/enums/test_enum.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,3 +94,59 @@ class Sample(Enum):

# Empty string should be in the error message (represented as '')
assert "''" in str(exc_info.value) or '""' in str(exc_info.value)


def test_enum_encode_pandas_series_with_enum_items():
"""
Test that encoding a pandas Series containing Enum items works.

In pandas 3.0, Series may have StringDtype index. The encode() method
uses array[0] to check if items are Enum instances, but this fails
with KeyError when the Series has a non-integer index.

This test verifies the fix for GitHub issue #427.
"""
import pandas as pd

class Sample(Enum):
MAXWELL = "maxwell"
DWORKIN = "dworkin"

# Create a pandas Series with Enum items (simulates what happens in
# policyengine-us county variable with pandas 3.0)
enum_items = [Sample.MAXWELL, Sample.DWORKIN, Sample.MAXWELL]
series = pd.Series(enum_items)

# This should work but fails with KeyError: 0 before the fix
encoded_array = Sample.encode(series)

assert len(encoded_array) == 3
assert isinstance(encoded_array, EnumArray)
assert encoded_array.dtype.kind == "i"
# Verify correct encoding
assert list(encoded_array) == [0, 1, 0] # MAXWELL=0, DWORKIN=1


def test_enum_encode_pandas_series_with_string_index():
"""
Test that encoding a pandas Series with a string index works.

This specifically tests the pandas 3.0 case where StringDtype is used
and array[0] does label-based lookup instead of positional access.
"""
import pandas as pd

class Sample(Enum):
MAXWELL = "maxwell"
DWORKIN = "dworkin"

# Create a Series with a string index (like pandas 3.0 StringDtype)
enum_items = [Sample.MAXWELL, Sample.DWORKIN, Sample.MAXWELL]
series = pd.Series(enum_items, index=["a", "b", "c"])

# This fails with KeyError: 0 when using array[0] instead of .iloc[0]
encoded_array = Sample.encode(series)

assert len(encoded_array) == 3
assert isinstance(encoded_array, EnumArray)
assert list(encoded_array) == [0, 1, 0]