From d57165d2300c5f73f77840bf65f8b4bce4680c85 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Sun, 26 Jan 2025 22:50:41 -0600 Subject: [PATCH 1/9] fewer custom scalar and array types --- .../src/geoarrow/pyarrow/_array.py | 43 +------------------ .../src/geoarrow/pyarrow/_scalar.py | 38 +--------------- geoarrow-pyarrow/tests/test_pyarrow.py | 4 +- 3 files changed, 4 insertions(+), 81 deletions(-) diff --git a/geoarrow-pyarrow/src/geoarrow/pyarrow/_array.py b/geoarrow-pyarrow/src/geoarrow/pyarrow/_array.py index 64cfd26..1437623 100644 --- a/geoarrow-pyarrow/src/geoarrow/pyarrow/_array.py +++ b/geoarrow-pyarrow/src/geoarrow/pyarrow/_array.py @@ -78,49 +78,8 @@ def __repr__(self): return f"{type_name}:{repr(self.type)}[{len(self)}]\n{items_str}".strip() -class PointArray(GeometryExtensionArray): - pass - - -class LinestringArray(GeometryExtensionArray): - pass - - -class PolygonArray(GeometryExtensionArray): - pass - - -class MultiPointArray(GeometryExtensionArray): - pass - - -class MultiLinestringArray(GeometryExtensionArray): - pass - - -class MultiPolygonArray(GeometryExtensionArray): - pass - - def array_cls_from_name(name): - if name == "geoarrow.wkb": - return GeometryExtensionArray - elif name == "geoarrow.wkt": - return GeometryExtensionArray - elif name == "geoarrow.point": - return PointArray - elif name == "geoarrow.linestring": - return LinestringArray - elif name == "geoarrow.polygon": - return PolygonArray - elif name == "geoarrow.multipoint": - return MultiPointArray - elif name == "geoarrow.multilinestring": - return MultiLinestringArray - elif name == "geoarrow.multipolygon": - return MultiPolygonArray - else: - raise ValueError(f'Expected valid extension name but got "{name}"') + return GeometryExtensionArray # Inject array_cls_from_name exactly once to avoid circular import diff --git a/geoarrow-pyarrow/src/geoarrow/pyarrow/_scalar.py b/geoarrow-pyarrow/src/geoarrow/pyarrow/_scalar.py index ef30631..a491593 100644 --- a/geoarrow-pyarrow/src/geoarrow/pyarrow/_scalar.py +++ b/geoarrow-pyarrow/src/geoarrow/pyarrow/_scalar.py @@ -72,49 +72,13 @@ def wkb(self): return self.value.as_py() -class PointScalar(GeometryExtensionScalar): - pass - - -class LinestringScalar(GeometryExtensionScalar): - pass - - -class PolygonScalar(GeometryExtensionScalar): - pass - - -class MultiPointScalar(GeometryExtensionScalar): - pass - - -class MultiLinestringScalar(GeometryExtensionScalar): - pass - - -class MultiPolygonScalar(GeometryExtensionScalar): - pass - - def scalar_cls_from_name(name): if name == "geoarrow.wkb": return WkbScalar elif name == "geoarrow.wkt": return WktScalar - elif name == "geoarrow.point": - return PointScalar - elif name == "geoarrow.linestring": - return LinestringScalar - elif name == "geoarrow.polygon": - return PolygonScalar - elif name == "geoarrow.multipoint": - return MultiPointScalar - elif name == "geoarrow.multilinestring": - return MultiLinestringScalar - elif name == "geoarrow.multipolygon": - return MultiPolygonScalar else: - raise ValueError(f'Expected valid extension name but got "{name}"') + return GeometryExtensionScalar # Inject array_cls_from_name exactly once to avoid circular import diff --git a/geoarrow-pyarrow/tests/test_pyarrow.py b/geoarrow-pyarrow/tests/test_pyarrow.py index 59bcd64..eb6a490 100644 --- a/geoarrow-pyarrow/tests/test_pyarrow.py +++ b/geoarrow-pyarrow/tests/test_pyarrow.py @@ -171,7 +171,7 @@ def test_scalar_geoarrow(): array = ga.as_geoarrow(["POINT (0 1)"]) assert array[0].wkt == "POINT (0 1)" assert array[0].wkb == ga.as_wkb(array).storage[0].as_py() - assert repr(array[0]).startswith("PointScalar") + assert repr(array[0]).startswith("GeometryExtensionScalar") def test_scalar_repr(): @@ -227,7 +227,7 @@ def test_kernel_as(): out = kernel.push(array) assert out.type.extension_name == "geoarrow.point" assert out.type.crs.to_json_dict() == types.OGC_CRS84.to_json_dict() - assert isinstance(out, _array.PointArray) + assert isinstance(out, _array.GeometryExtensionArray) def test_kernel_format(): From 7e7495518bd86713e866c2ae6717cf9cf3fae899 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Tue, 28 Jan 2025 15:19:10 -0600 Subject: [PATCH 2/9] use new box format --- geoarrow-pandas/src/geoarrow/pandas/lib.py | 8 ++++---- geoarrow-pyarrow/src/geoarrow/pyarrow/_compute.py | 15 ++++++++------- geoarrow-pyarrow/src/geoarrow/pyarrow/dataset.py | 6 +++--- geoarrow-pyarrow/src/geoarrow/pyarrow/io.py | 1 + 4 files changed, 16 insertions(+), 14 deletions(-) diff --git a/geoarrow-pandas/src/geoarrow/pandas/lib.py b/geoarrow-pandas/src/geoarrow/pandas/lib.py index dc65e96..b754388 100644 --- a/geoarrow-pandas/src/geoarrow/pandas/lib.py +++ b/geoarrow-pandas/src/geoarrow/pandas/lib.py @@ -507,18 +507,18 @@ def bounds(self): """See :func:`geoarrow.pyarrow.box`""" array_or_chunked = _ga.box(self._obj) if isinstance(array_or_chunked, _pa.ChunkedArray): - flattened = [chunk.flatten() for chunk in array_or_chunked.chunks] + flattened = [chunk.storage.flatten() for chunk in array_or_chunked.chunks] seriesish = [ _pa.chunked_array(item, _pa.float64()) for item in zip(*flattened) ] else: - seriesish = array_or_chunked.flatten() + seriesish = array_or_chunked.storage.flatten() return _pd.DataFrame( { "xmin": seriesish[0], - "xmax": seriesish[1], - "ymin": seriesish[2], + "xmax": seriesish[2], + "ymin": seriesish[1], "ymax": seriesish[3], }, index=self._obj.index, diff --git a/geoarrow-pyarrow/src/geoarrow/pyarrow/_compute.py b/geoarrow-pyarrow/src/geoarrow/pyarrow/_compute.py index de7db0f..ca6eabf 100644 --- a/geoarrow-pyarrow/src/geoarrow/pyarrow/_compute.py +++ b/geoarrow-pyarrow/src/geoarrow/pyarrow/_compute.py @@ -338,10 +338,11 @@ def make_point(x, y, z=None, m=None, crs=None): def _box_point_struct(storage): arrays = storage.flatten() - return pa.StructArray.from_arrays( - [arrays[0], arrays[0], arrays[1], arrays[1]], - names=["xmin", "xmax", "ymin", "ymax"], + box_storage = pa.StructArray.from_arrays( + [arrays[0], arrays[1], arrays[0], arrays[1]], + names=["xmin", "ymin", "xmax", "ymax"], ) + return _type.types.box().to_pyarrow().wrap_array(box_storage) def box(obj): @@ -399,15 +400,15 @@ def _box_agg_point_struct(arrays): out = [list(pc.min_max(array).values()) for array in arrays] out_dict = { "xmin": out[0][0].as_py(), - "xmax": out[0][1].as_py(), "ymin": out[1][0].as_py(), + "xmax": out[0][1].as_py(), "ymax": out[1][1].as_py(), } # Apparently pyarrow reorders dict keys when inferring scalar types? - return pa.scalar( - out_dict, pa.struct([(nm, pa.float64()) for nm in out_dict.keys()]) - ) + storage_type = pa.struct([(nm, pa.float64()) for nm in out_dict.keys()]) + storage_array = pa.array([out_dict], storage_type) + return _type.types.box().to_pyarrow().wrap_array(storage_array)[0] def box_agg(obj): diff --git a/geoarrow-pyarrow/src/geoarrow/pyarrow/dataset.py b/geoarrow-pyarrow/src/geoarrow/pyarrow/dataset.py index ef64b7f..d50be05 100644 --- a/geoarrow-pyarrow/src/geoarrow/pyarrow/dataset.py +++ b/geoarrow-pyarrow/src/geoarrow/pyarrow/dataset.py @@ -213,7 +213,7 @@ def filter_fragments(self, target): if isinstance(target, str): target = [target] - target_box = box_agg(target) + target_box = box_agg(target).as_py() maybe_intersects = GeoDataset._index_box_intersects( self.index_fragments(), target_box, self.geometry_columns ) @@ -255,7 +255,7 @@ def _index_fragment(fragment, column, type): kernel = Kernel.box_agg(type) for batch in reader: kernel.push(batch.column(0)) - return kernel.finish() + return kernel.finish().storage @staticmethod def _index_fragments(fragments, columns, types, num_threads=None): @@ -295,7 +295,7 @@ def _index_fragments(fragments, columns, types, num_threads=None): @staticmethod def _index_box_intersects(index, box, columns): - xmin, xmax, ymin, ymax = box.as_py().values() + xmin, ymin, xmax, ymax = box.values() expressions = [] for col in columns: expr = ( diff --git a/geoarrow-pyarrow/src/geoarrow/pyarrow/io.py b/geoarrow-pyarrow/src/geoarrow/pyarrow/io.py index 593fdca..ab427b8 100644 --- a/geoarrow-pyarrow/src/geoarrow/pyarrow/io.py +++ b/geoarrow-pyarrow/src/geoarrow/pyarrow/io.py @@ -508,6 +508,7 @@ def geoparquet_encoding_geoarrow(): "MultiPoint", "MultiLineString", "MultiPolygon", + "GeometryCollection" ] _GEOPARQUET_DIMENSION_LABELS = [None, "", " Z", " M", " ZM"] From 4337f0beb16e08f9a433d4f353812b591e705215 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Sat, 1 Feb 2025 21:10:31 -0600 Subject: [PATCH 3/9] fix deserialization of crs --- geoarrow-types/src/geoarrow/types/crs.py | 4 +++- geoarrow-types/src/geoarrow/types/type_spec.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/geoarrow-types/src/geoarrow/types/crs.py b/geoarrow-types/src/geoarrow/types/crs.py index 520b091..e0e422e 100644 --- a/geoarrow-types/src/geoarrow/types/crs.py +++ b/geoarrow-types/src/geoarrow/types/crs.py @@ -133,7 +133,9 @@ def __repr__(self) -> str: class StringCrs(Crs): def __init__(self, crs: Union[str, bytes]): - if isinstance(crs, bytes): + if isinstance(crs, str): + self._crs = crs + elif isinstance(crs, bytes): self._crs = crs.decode() else: self._crs = str(crs) diff --git a/geoarrow-types/src/geoarrow/types/type_spec.py b/geoarrow-types/src/geoarrow/types/type_spec.py index 3d07ce8..725f54d 100644 --- a/geoarrow-types/src/geoarrow/types/type_spec.py +++ b/geoarrow-types/src/geoarrow/types/type_spec.py @@ -272,7 +272,7 @@ def from_extension_metadata(extension_metadata: str): if "crs_type" in metadata and metadata["crs_type"] == "projjson": out_crs = crs.ProjJsonCrs(metadata["crs"]) else: - out_crs = crs.StringCrs(metadata["crs"]) + out_crs = crs.create(metadata["crs"]) return TypeSpec(edge_type=out_edges, crs=out_crs) From 208dc5449e81cfba99188e2a3f2f8fe88267a7d1 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Sat, 1 Feb 2025 21:14:51 -0600 Subject: [PATCH 4/9] format --- geoarrow-pyarrow/src/geoarrow/pyarrow/io.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/geoarrow-pyarrow/src/geoarrow/pyarrow/io.py b/geoarrow-pyarrow/src/geoarrow/pyarrow/io.py index ab427b8..c4ffe92 100644 --- a/geoarrow-pyarrow/src/geoarrow/pyarrow/io.py +++ b/geoarrow-pyarrow/src/geoarrow/pyarrow/io.py @@ -508,7 +508,7 @@ def geoparquet_encoding_geoarrow(): "MultiPoint", "MultiLineString", "MultiPolygon", - "GeometryCollection" + "GeometryCollection", ] _GEOPARQUET_DIMENSION_LABELS = [None, "", " Z", " M", " ZM"] From baf6638273ac7a5e12d250327bdb2dacf61cb20a Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Sat, 1 Feb 2025 22:16:19 -0600 Subject: [PATCH 5/9] maybe work with old geoarrow-c --- .../src/geoarrow/pyarrow/_kernel.py | 52 +++++++++++++++++-- 1 file changed, 47 insertions(+), 5 deletions(-) diff --git a/geoarrow-pyarrow/src/geoarrow/pyarrow/_kernel.py b/geoarrow-pyarrow/src/geoarrow/pyarrow/_kernel.py index f61cfd6..0e091ea 100644 --- a/geoarrow-pyarrow/src/geoarrow/pyarrow/_kernel.py +++ b/geoarrow-pyarrow/src/geoarrow/pyarrow/_kernel.py @@ -2,21 +2,29 @@ import pyarrow as pa import pyarrow_hotfix as _ # noqa: F401 +from geoarrow.types import box as box_spec from geoarrow.pyarrow._type import GeometryExtensionType _lazy_lib = None +_geoarrow_c_version = None def _geoarrow_c(): - global _lazy_lib + global _lazy_lib, _geoarrow_c_version if _lazy_lib is None: try: - from geoarrow.c import lib + import geoarrow.c + except ImportError as e: raise ImportError("Requested operation requires geoarrow-c") from e - _lazy_lib = lib + _lazy_lib = geoarrow.c.lib + if hasattr(geoarrow.c, "__version_tuple__"): + _geoarrow_c_version = geoarrow.c.__version_tuple__ + else: + _geoarrow_c_version = (0, 1, 0) + return _lazy_lib @@ -109,11 +117,19 @@ def unique_geometry_types_agg(type_in): @staticmethod def box(type_in): - return Kernel("box", type_in) + kernel = Kernel("box", type_in) + if _geoarrow_c_version <= (0, 1, 3): + return BoxKernelCompat(kernel) + else: + return kernel @staticmethod def box_agg(type_in): - return Kernel("box_agg", type_in) + kernel = Kernel("box_agg", type_in) + if _geoarrow_c_version <= (0, 1, 3): + return BoxKernelCompat(kernel) + else: + return kernel @staticmethod def _pack_options(options): @@ -132,3 +148,29 @@ def _pack_options(options): bytes += v.encode("UTF-8") return bytes + + +class BoxKernelCompat: + """A wrapper around the "box" kernel that works for geoarrow-c 0.1. + This is mostly to ease the transition for geoarrow-python CI while + all the packages are being updated.""" + + def __init__(self, parent: Kernel): + self.parent = parent + self.type_out = box_spec().to_pyarrow().with_crs(parent._type_in.crs) + + def push(self, arr): + parent_result = self.parent.push(arr) + return ( + None if parent_result is None else self._old_box_to_new_box(parent_result) + ) + + def finish(self): + return self._old_box_to_new_box(self.parent.finish()) + + def _old_box_to_new_box(self, array): + xmin, xmax, ymin, ymax = array.flatten() + storage = pa.StructArray.from_arrays( + [xmin, ymin, xmax, ymax], names=["xmin", "ymin", "xmax", "ymax"] + ) + return self.type_out.wrap_array(storage) From 5487da63edee488bbc031f27050120c7352d89b1 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Sat, 1 Feb 2025 22:55:56 -0600 Subject: [PATCH 6/9] check docs --- .../src/geoarrow/pyarrow/_array.py | 2 +- .../src/geoarrow/pyarrow/_compute.py | 24 +++++----- .../src/geoarrow/pyarrow/_scalar.py | 46 +++++++++++++++++++ .../src/geoarrow/pyarrow/dataset.py | 2 +- 4 files changed, 60 insertions(+), 14 deletions(-) diff --git a/geoarrow-pyarrow/src/geoarrow/pyarrow/_array.py b/geoarrow-pyarrow/src/geoarrow/pyarrow/_array.py index 1437623..179fad5 100644 --- a/geoarrow-pyarrow/src/geoarrow/pyarrow/_array.py +++ b/geoarrow-pyarrow/src/geoarrow/pyarrow/_array.py @@ -101,7 +101,7 @@ def array(obj, type_=None, *args, **kwargs) -> GeometryExtensionArray: GeometryExtensionArray:WktType(geoarrow.wkt)[1] >>> ga.as_geoarrow(["POINT (0 1)"]) - PointArray:PointType(geoarrow.point)[1] + GeometryExtensionArray:PointType(geoarrow.point)[1] """ # Convert GeoPandas to WKB diff --git a/geoarrow-pyarrow/src/geoarrow/pyarrow/_compute.py b/geoarrow-pyarrow/src/geoarrow/pyarrow/_compute.py index ca6eabf..2d9a074 100644 --- a/geoarrow-pyarrow/src/geoarrow/pyarrow/_compute.py +++ b/geoarrow-pyarrow/src/geoarrow/pyarrow/_compute.py @@ -251,7 +251,7 @@ def as_geoarrow(obj, type=None, coord_type=None, promote_multi=False): >>> import geoarrow.pyarrow as ga >>> ga.as_geoarrow(["POINT (0 1)", "MULTIPOINT Z (0 1 2, 4 5 6)"]) - MultiPointArray:MultiPointType(geoarrow.multipoint_z)[2] + GeometryExtensionArray:MultiPointType(geoarrow.multipoint_z)[2] """ @@ -307,7 +307,7 @@ def make_point(x, y, z=None, m=None, crs=None): >>> import geoarrow.pyarrow as ga >>> ga.make_point([1, 2, 3], [4, 5, 6]) - PointArray:PointType(geoarrow.point)[3] + GeometryExtensionArray:PointType(geoarrow.point)[3] @@ -351,7 +351,7 @@ def box(obj): >>> import geoarrow.pyarrow as ga >>> ga.box(["LINESTRING (0 10, 34 -1)"]).type - StructType(struct) + BoxType(geoarrow.box) >>> print(str(ga.box(["LINESTRING (0 10, 34 -1)"]))) -- is_valid: all not null -- child 0 type: double @@ -360,11 +360,11 @@ def box(obj): ] -- child 1 type: double [ - 34 + -1 ] -- child 2 type: double [ - -1 + 34 ] -- child 3 type: double [ @@ -418,7 +418,7 @@ def box_agg(obj): >>> import geoarrow.pyarrow as ga >>> ga.box_agg(["POINT (0 10)", "POINT (34 -1)"]) - + BoxScalar({'xmin': 0.0, 'ymin': -1.0, 'xmax': 34.0, 'ymax': 10.0}) """ obj = obj_as_array_or_chunked(obj) @@ -496,7 +496,7 @@ def with_coord_type(obj, coord_type): >>> import geoarrow.pyarrow as ga >>> ga.with_coord_type(["POINT (0 1)"], ga.CoordType.INTERLEAVED) - PointArray:PointType(interleaved geoarrow.point)[1] + GeometryExtensionArray:PointType(interleaved geoarrow.point)[1] """ return as_geoarrow(obj, coord_type=coord_type) @@ -538,10 +538,10 @@ def with_dimensions(obj, dimensions): >>> import geoarrow.pyarrow as ga >>> ga.with_dimensions(["POINT (0 1)"], ga.Dimensions.XYZM) - PointArray:PointType(geoarrow.point_zm)[1] + GeometryExtensionArray:PointType(geoarrow.point_zm)[1] >>> ga.with_dimensions(["POINT ZM (0 1 2 3)"], ga.Dimensions.XY) - PointArray:PointType(geoarrow.point)[1] + GeometryExtensionArray:PointType(geoarrow.point)[1] """ obj = as_geoarrow(obj) @@ -558,13 +558,13 @@ def with_geometry_type(obj, geometry_type): >>> import geoarrow.pyarrow as ga >>> ga.with_geometry_type(["POINT (0 1)"], ga.GeometryType.MULTIPOINT) - MultiPointArray:MultiPointType(geoarrow.multipoint)[1] + GeometryExtensionArray:MultiPointType(geoarrow.multipoint)[1] >>> ga.with_geometry_type(["MULTIPOINT (0 1)"], ga.GeometryType.POINT) - PointArray:PointType(geoarrow.point)[1] + GeometryExtensionArray:PointType(geoarrow.point)[1] >>> ga.with_geometry_type(["LINESTRING EMPTY", "POINT (0 1)"], ga.GeometryType.POINT) - PointArray:PointType(geoarrow.point)[2] + GeometryExtensionArray:PointType(geoarrow.point)[2] >>> ga.with_geometry_type(["MULTIPOINT (0 1, 2 3)"], ga.GeometryType.POINT) diff --git a/geoarrow-pyarrow/src/geoarrow/pyarrow/_scalar.py b/geoarrow-pyarrow/src/geoarrow/pyarrow/_scalar.py index a491593..6ca281d 100644 --- a/geoarrow-pyarrow/src/geoarrow/pyarrow/_scalar.py +++ b/geoarrow-pyarrow/src/geoarrow/pyarrow/_scalar.py @@ -1,3 +1,5 @@ +from typing import Optional + import pyarrow as pa import pyarrow_hotfix as _ # noqa: F401 from geoarrow.pyarrow._kernel import Kernel @@ -72,11 +74,55 @@ def wkb(self): return self.value.as_py() +class BoxScalar(GeometryExtensionScalar): + @property + def bounds(self) -> dict: + storage = self._array1().storage + return {k: v[0].as_py() for k, v in zip(storage.type.names, storage.flatten())} + + @property + def xmin(self) -> float: + return self.bounds["xmin"] + + @property + def ymin(self) -> float: + return self.bounds["ymin"] + + @property + def xmax(self) -> float: + return self.bounds["xmax"] + + @property + def ymax(self) -> float: + return self.bounds["ymax"] + + @property + def zmin(self) -> Optional[float]: + return self.bounds["zmin"] if "zmin" in self.bounds else None + + @property + def zmax(self) -> Optional[float]: + return self.bounds["zmax"] if "zmax" in self.bounds else None + + @property + def mmin(self) -> Optional[float]: + return self.bounds["mmin"] if "mmin" in self.bounds else None + + @property + def mmax(self) -> Optional[float]: + return self.bounds["mmax"] if "mmax" in self.bounds else None + + def __repr__(self) -> str: + return f"BoxScalar({self.bounds})" + + def scalar_cls_from_name(name): if name == "geoarrow.wkb": return WkbScalar elif name == "geoarrow.wkt": return WktScalar + elif name == "geoarrow.box": + return BoxScalar else: return GeometryExtensionScalar diff --git a/geoarrow-pyarrow/src/geoarrow/pyarrow/dataset.py b/geoarrow-pyarrow/src/geoarrow/pyarrow/dataset.py index d50be05..eaa7b3c 100644 --- a/geoarrow-pyarrow/src/geoarrow/pyarrow/dataset.py +++ b/geoarrow-pyarrow/src/geoarrow/pyarrow/dataset.py @@ -160,7 +160,7 @@ def index_fragments(self, num_threads=None): >>> table = pa.table([ga.array(["POINT (0.5 1.5)"])], ["geometry"]) >>> dataset = gads.dataset(table) >>> dataset.index_fragments().to_pylist() - [{'_fragment_index': 0, 'geometry': {'xmin': 0.5, 'xmax': 0.5, 'ymin': 1.5, 'ymax': 1.5}}] + [{'_fragment_index': 0, 'geometry': {'xmin': 0.5, 'ymin': 1.5, 'xmax': 0.5, 'ymax': 1.5}}] """ if self._index is None: self._index = self._build_index( From 8ca9316b079a8992a7b2ecae83c1147d953f51e9 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Sun, 2 Feb 2025 16:26:55 -0600 Subject: [PATCH 7/9] scalar box test, fix repr for the box --- geoarrow-pyarrow/src/geoarrow/pyarrow/_array.py | 12 +++++++++++- geoarrow-pyarrow/tests/test_pyarrow.py | 14 ++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/geoarrow-pyarrow/src/geoarrow/pyarrow/_array.py b/geoarrow-pyarrow/src/geoarrow/pyarrow/_array.py index 179fad5..7782626 100644 --- a/geoarrow-pyarrow/src/geoarrow/pyarrow/_array.py +++ b/geoarrow-pyarrow/src/geoarrow/pyarrow/_array.py @@ -78,8 +78,18 @@ def __repr__(self): return f"{type_name}:{repr(self.type)}[{len(self)}]\n{items_str}".strip() +class BoxArray(GeometryExtensionArray): + def __repr__(self): + type_name = type(self).__name__ + items_str = "\n".join(repr(item.bounds) for item in self) + return f"{type_name}:{repr(self.type)}[{len(self)}]\n{items_str}".strip() + + def array_cls_from_name(name): - return GeometryExtensionArray + if name == "geoarrow.box": + return BoxArray + else: + return GeometryExtensionArray # Inject array_cls_from_name exactly once to avoid circular import diff --git a/geoarrow-pyarrow/tests/test_pyarrow.py b/geoarrow-pyarrow/tests/test_pyarrow.py index eb6a490..65a0b53 100644 --- a/geoarrow-pyarrow/tests/test_pyarrow.py +++ b/geoarrow-pyarrow/tests/test_pyarrow.py @@ -174,6 +174,20 @@ def test_scalar_geoarrow(): assert repr(array[0]).startswith("GeometryExtensionScalar") +def test_scalar_box(): + # The box kernel doesn't yet implement non XY boxes + array = ga.box(["LINESTRING ZM (0 1 2 3, 4 5 6 7)"]) + assert array[0].xmin == 0 + assert array[0].ymin == 1 + assert array[0].zmin is None + assert array[0].mmin is None + assert array[0].xmax == 4 + assert array[0].ymax == 5 + assert array[0].zmax is None + assert array[0].mmax is None + assert repr(array[0]).startswith("BoxScalar") + + def test_scalar_repr(): array = ga.array( ["LINESTRING (100000 100000, 100000 100000, 100000 100000, 100000 100000)"] From 7bdfdd0308b34413790c298863903f791e36bb44 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Sun, 2 Feb 2025 17:23:31 -0600 Subject: [PATCH 8/9] maybe work with old pyarrow --- geoarrow-pyarrow/src/geoarrow/pyarrow/_scalar.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/geoarrow-pyarrow/src/geoarrow/pyarrow/_scalar.py b/geoarrow-pyarrow/src/geoarrow/pyarrow/_scalar.py index 6ca281d..30e8b21 100644 --- a/geoarrow-pyarrow/src/geoarrow/pyarrow/_scalar.py +++ b/geoarrow-pyarrow/src/geoarrow/pyarrow/_scalar.py @@ -78,7 +78,8 @@ class BoxScalar(GeometryExtensionScalar): @property def bounds(self) -> dict: storage = self._array1().storage - return {k: v[0].as_py() for k, v in zip(storage.type.names, storage.flatten())} + fields = [storage.type.field(i) for i in range(storage.type.num_fields)] + return {k.name: v[0].as_py() for k, v in zip(fields, storage.flatten())} @property def xmin(self) -> float: From 1b3602747936af8607ab4f939423ad4745c40b0f Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Sun, 2 Feb 2025 21:42:15 -0600 Subject: [PATCH 9/9] tweaks and tests for printing --- .../src/geoarrow/pyarrow/_scalar.py | 16 ++++++++++++---- geoarrow-pyarrow/tests/test_pyarrow.py | 18 ++++++++++++++++++ 2 files changed, 30 insertions(+), 4 deletions(-) diff --git a/geoarrow-pyarrow/src/geoarrow/pyarrow/_scalar.py b/geoarrow-pyarrow/src/geoarrow/pyarrow/_scalar.py index 30e8b21..2333e32 100644 --- a/geoarrow-pyarrow/src/geoarrow/pyarrow/_scalar.py +++ b/geoarrow-pyarrow/src/geoarrow/pyarrow/_scalar.py @@ -14,6 +14,16 @@ def __repr__(self): if pa_version[0] < 13: return super().__repr__() + # Pretty WKT printing needs geoarrow-c + try: + from geoarrow import c # noqa: F401 + except ImportError: + return ( + super().__repr__() + + "\n" + + "* pip install geoarrow-c for prettier printing of geometry scalars" + ) + max_width = 70 try: @@ -26,7 +36,7 @@ def __repr__(self): if len(string_formatted) >= max_width: string_formatted = string_formatted[: (max_width - 3)] + "..." - return f"{type(self).__name__}\n<{string_formatted}>" + return f"{type(self).__name__}:{repr(self.type)}\n<{string_formatted}>" def _array1(self): return self.type.wrap_array(pa.array([self.value])) @@ -77,9 +87,7 @@ def wkb(self): class BoxScalar(GeometryExtensionScalar): @property def bounds(self) -> dict: - storage = self._array1().storage - fields = [storage.type.field(i) for i in range(storage.type.num_fields)] - return {k.name: v[0].as_py() for k, v in zip(fields, storage.flatten())} + return self.as_py() @property def xmin(self) -> float: diff --git a/geoarrow-pyarrow/tests/test_pyarrow.py b/geoarrow-pyarrow/tests/test_pyarrow.py index 65a0b53..6396741 100644 --- a/geoarrow-pyarrow/tests/test_pyarrow.py +++ b/geoarrow-pyarrow/tests/test_pyarrow.py @@ -387,6 +387,24 @@ def test_multipolygon_array_from_geobuffers(): assert ga.as_wkt(arr)[0].as_py() == "MULTIPOLYGON (((1 4, 2 5, 3 6, 1 4)))" +def test_box_array_from_geobuffers(): + arr = ( + types.box() + .to_pyarrow() + .from_geobuffers( + b"\xff", + np.array([1.0, 2.0, 3.0]), + np.array([4.0, 5.0, 6.0]), + np.array([7.0, 8.0, 9.0]), + np.array([10.0, 11.0, 12.0]), + ) + ) + assert len(arr) == 3 + assert arr[2].bounds == {"xmin": 3.0, "ymin": 6.0, "xmax": 9.0, "ymax": 12.0} + assert "BoxArray" in repr(arr) + assert "'xmin': 3.0" in repr(arr) + + # Easier to test here because we have actual geoarrow arrays to parse def test_c_array_view(): arr = ga.as_geoarrow(["POLYGON ((0 0, 1 0, 0 1, 0 0))"])