diff --git a/openeo/extra/job_management/_manager.py b/openeo/extra/job_management/_manager.py index 5b0c1dba1..60e4281e6 100644 --- a/openeo/extra/job_management/_manager.py +++ b/openeo/extra/job_management/_manager.py @@ -94,6 +94,25 @@ def normalize_df(self, df: pd.DataFrame) -> pd.DataFrame: """ new_columns = {col: req.default for (col, req) in self._requirements.items() if col not in df.columns} df = df.assign(**new_columns) + # Apply dtype conversions to ensure compatibility with pandas 3's stricter type checking. + # This is especially important for columns that may contain NaN values but need to be strings. + # Only convert columns where the current dtype doesn't match the required dtype. + # For string columns (dtype="str"), we convert to "object" to maintain backward compatibility + # (pandas 3.x "str" creates StringDtype, pandas 2.x "str" was an alias for object). + dtype_conversions = {} + for col, req in self._requirements.items(): + if col in df.columns: + current_dtype = str(df[col].dtype) + required_dtype = req.dtype + # Only convert if the current dtype is different and not already compatible + if required_dtype == "str" and current_dtype not in {"object", "string", "str"}: + # Convert to object for backward compatibility + dtype_conversions[col] = "object" + elif required_dtype not in {"object", "str"} and current_dtype != required_dtype: + # For other dtypes (e.g., float64), convert as specified + dtype_conversions[col] = required_dtype + if dtype_conversions: + df = df.astype(dtype_conversions) return df def dtype_mapping(self) -> Dict[str, str]: diff --git a/openeo/udf/run_code.py b/openeo/udf/run_code.py index fc673dbed..0483126ff 100644 --- a/openeo/udf/run_code.py +++ b/openeo/udf/run_code.py @@ -77,7 +77,16 @@ def _get_annotation_str(annotation: Union[str, type]) -> str: def _annotation_is_pandas_series(annotation) -> bool: - return annotation in {pandas.Series, _get_annotation_str(pandas.Series)} + # Support both pandas 2.x ("pandas.core.series.Series") and pandas 3.x ("pandas.Series") + # We explicitly list both paths to ensure cross-version compatibility: + # - UDFs written with pandas 2.x annotations should work in pandas 3.x environments + # - UDFs written with pandas 3.x annotations should work in pandas 2.x environments + return annotation in { + pandas.Series, + _get_annotation_str(pandas.Series), # Current pandas version's path + "pandas.core.series.Series", # Explicit pandas 2.x path for cross-version support + "pandas.Series", # Explicit pandas 3.x path for cross-version support + } def _annotation_is_udf_datacube(annotation) -> bool: diff --git a/setup.py b/setup.py index 30dcce02e..61f4d7cca 100644 --- a/setup.py +++ b/setup.py @@ -80,7 +80,7 @@ "shapely>=1.6.4", "numpy>=1.17.0", "xarray>=0.12.3,<2025.01.2", # TODO #721 xarray non-nanosecond support - "pandas>0.20.0,<3.0.0", # TODO pandas 3 compatibility https://github.com/Open-EO/openeo-python-client/issues/856 + "pandas>0.20.0", # pandas 3 compatibility added # TODO #578: pystac 1.5.0 is highest version available for lowest Python version we still support (3.7). "pystac>=1.5.0", "deprecated>=1.2.12", diff --git a/tests/udf/test_run_code.py b/tests/udf/test_run_code.py index 2579b574e..15e71ddae 100644 --- a/tests/udf/test_run_code.py +++ b/tests/udf/test_run_code.py @@ -28,7 +28,10 @@ @pytest.mark.parametrize(["annotation", "expected"], [ ("str", "str"), - (pandas.Series, "pandas.core.series.Series"), + # pandas.Series module path changed in pandas 3.0: + # pandas 2.x: "pandas.core.series.Series" + # pandas 3.x: "pandas.Series" + (pandas.Series, _get_annotation_str(pandas.Series)), (XarrayDataCube, "openeo.udf.xarraydatacube.XarrayDataCube"), (UdfData, "openeo.udf.udf_data.UdfData"), (str, "str"), @@ -40,7 +43,9 @@ def test_get_annotation_str(annotation, expected): def test_annotation_is_pandas_series(): assert _annotation_is_pandas_series(pandas.Series) is True - assert _annotation_is_pandas_series("pandas.core.series.Series") is True + # Support both pandas 2.x and 3.x module paths + assert _annotation_is_pandas_series("pandas.core.series.Series") is True # pandas 2.x + assert _annotation_is_pandas_series("pandas.Series") is True # pandas 3.x def test_annotation_is_udf_datacube():