Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions openeo/extra/job_management/_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,25 @@ def normalize_df(self, df: pd.DataFrame) -> pd.DataFrame:
"""
new_columns = {col: req.default for (col, req) in self._requirements.items() if col not in df.columns}
df = df.assign(**new_columns)
# Apply dtype conversions to ensure compatibility with pandas 3's stricter type checking.
# This is especially important for columns that may contain NaN values but need to be strings.
# Only convert columns where the current dtype doesn't match the required dtype.
# For string columns (dtype="str"), we convert to "object" to maintain backward compatibility
# (pandas 3.x "str" creates StringDtype, pandas 2.x "str" was an alias for object).
dtype_conversions = {}
for col, req in self._requirements.items():
if col in df.columns:
current_dtype = str(df[col].dtype)
required_dtype = req.dtype
# Only convert if the current dtype is different and not already compatible
if required_dtype == "str" and current_dtype not in {"object", "string", "str"}:
# Convert to object for backward compatibility
dtype_conversions[col] = "object"
elif required_dtype not in {"object", "str"} and current_dtype != required_dtype:
# For other dtypes (e.g., float64), convert as specified
dtype_conversions[col] = required_dtype
if dtype_conversions:
df = df.astype(dtype_conversions)
return df

def dtype_mapping(self) -> Dict[str, str]:
Expand Down
11 changes: 10 additions & 1 deletion openeo/udf/run_code.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,16 @@ def _get_annotation_str(annotation: Union[str, type]) -> str:


def _annotation_is_pandas_series(annotation) -> bool:
return annotation in {pandas.Series, _get_annotation_str(pandas.Series)}
# Support both pandas 2.x ("pandas.core.series.Series") and pandas 3.x ("pandas.Series")
# We explicitly list both paths to ensure cross-version compatibility:
# - UDFs written with pandas 2.x annotations should work in pandas 3.x environments
# - UDFs written with pandas 3.x annotations should work in pandas 2.x environments
return annotation in {
pandas.Series,
_get_annotation_str(pandas.Series), # Current pandas version's path
"pandas.core.series.Series", # Explicit pandas 2.x path for cross-version support
"pandas.Series", # Explicit pandas 3.x path for cross-version support
}


def _annotation_is_udf_datacube(annotation) -> bool:
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@
"shapely>=1.6.4",
"numpy>=1.17.0",
"xarray>=0.12.3,<2025.01.2", # TODO #721 xarray non-nanosecond support
"pandas>0.20.0,<3.0.0", # TODO pandas 3 compatibility https://github.com/Open-EO/openeo-python-client/issues/856
"pandas>0.20.0", # pandas 3 compatibility added
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no need for a comment here anymore once issue is fixed

# TODO #578: pystac 1.5.0 is highest version available for lowest Python version we still support (3.7).
"pystac>=1.5.0",
"deprecated>=1.2.12",
Expand Down
9 changes: 7 additions & 2 deletions tests/udf/test_run_code.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,10 @@

@pytest.mark.parametrize(["annotation", "expected"], [
("str", "str"),
(pandas.Series, "pandas.core.series.Series"),
# pandas.Series module path changed in pandas 3.0:
# pandas 2.x: "pandas.core.series.Series"
# pandas 3.x: "pandas.Series"
(pandas.Series, _get_annotation_str(pandas.Series)),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test case is now not testing anything anymore, as it basically resolves to

    assert _get_annotation_str(pandas.Series) == _get_annotation_str(pandas.Series)

change this to dirty_equals.IsOneOf

(XarrayDataCube, "openeo.udf.xarraydatacube.XarrayDataCube"),
(UdfData, "openeo.udf.udf_data.UdfData"),
(str, "str"),
Expand All @@ -40,7 +43,9 @@ def test_get_annotation_str(annotation, expected):

def test_annotation_is_pandas_series():
assert _annotation_is_pandas_series(pandas.Series) is True
assert _annotation_is_pandas_series("pandas.core.series.Series") is True
# Support both pandas 2.x and 3.x module paths
assert _annotation_is_pandas_series("pandas.core.series.Series") is True # pandas 2.x
assert _annotation_is_pandas_series("pandas.Series") is True # pandas 3.x


def test_annotation_is_udf_datacube():
Expand Down