Skip to content

Commit f705b51

Browse files
authored
feat(geoarrow-types): Add support for WKB and WKT view types (#68)
1 parent c61b1eb commit f705b51

9 files changed

Lines changed: 109 additions & 6 deletions

File tree

geoarrow-pyarrow/pyproject.toml

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,10 @@ description = ""
2323
authors = [{name = "Dewey Dunnington", email = "dewey@dunnington.ca"}]
2424
license = {text = "Apache-2.0"}
2525
requires-python = ">=3.8"
26-
dependencies = ["pyarrow >= 14.0.2", "geoarrow-types"]
26+
dependencies = ["pyarrow >= 14.0.2", "geoarrow-types", "geoarrow-c"]
2727

2828
[project.optional-dependencies]
29-
test = ["pytest", "pandas", "numpy", "geopandas", "pyogrio", "pyproj", "geoarrow-c"]
30-
compute = ["geoarrow-c"]
29+
test = ["pytest", "pandas", "numpy", "geopandas", "pyogrio", "pyproj"]
3130

3231
[project.urls]
3332
homepage = "https://geoarrow.org"

geoarrow-pyarrow/src/geoarrow/pyarrow/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,10 @@
3030
MultiPolygonType,
3131
wkb,
3232
large_wkb,
33+
wkb_view,
3334
wkt,
3435
large_wkt,
36+
wkt_view,
3537
point,
3638
linestring,
3739
polygon,
@@ -92,8 +94,10 @@
9294
"MultiPolygonType",
9395
"wkb",
9496
"large_wkb",
97+
"wkb_view",
9598
"wkt",
9699
"large_wkt",
100+
"wkt_view",
97101
"point",
98102
"linestring",
99103
"polygon",

geoarrow-pyarrow/src/geoarrow/pyarrow/_type.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,18 @@ def large_wkb() -> WkbType:
4040
return WkbType.__arrow_ext_deserialize__(pa.large_binary(), b"")
4141

4242

43+
def wkb_view() -> WkbType:
44+
"""Well-known binary using binary views as the underlying storage.
45+
46+
>>> import geoarrow.pyarrow as ga
47+
>>> ga.wkb_view()
48+
WkbType(geoarrow.wkb)
49+
>>> ga.wkb_view().storage_type
50+
DataType(binary_view)
51+
"""
52+
return WkbType.__arrow_ext_deserialize__(pa.binary_view(), b"")
53+
54+
4355
def wkt() -> WktType:
4456
"""Well-known text with a maximum array size of 2 GB per chunk.
4557
@@ -64,6 +76,18 @@ def large_wkt() -> WktType:
6476
return WktType.__arrow_ext_deserialize__(pa.large_utf8(), b"")
6577

6678

79+
def wkt_view() -> WktType:
80+
"""Well-known text using string views as the underlying storage.
81+
82+
>>> import geoarrow.pyarrow as ga
83+
>>> ga.wkt_view()
84+
WktType(geoarrow.wkt)
85+
>>> ga.wkt_view().storage_type
86+
DataType(string_view)
87+
"""
88+
return WktType.__arrow_ext_deserialize__(pa.string_view(), b"")
89+
90+
6791
def point() -> PointType:
6892
"""Geoarrow-encoded point features.
6993

geoarrow-pyarrow/tests/test_pyarrow.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,8 +68,10 @@ def test_type_with_crs_pyproj():
6868
def test_constructors():
6969
assert ga.wkb().extension_name == "geoarrow.wkb"
7070
assert ga.large_wkb().extension_name == "geoarrow.wkb"
71+
assert ga.wkb_view().extension_name == "geoarrow.wkb"
7172
assert ga.wkt().extension_name == "geoarrow.wkt"
7273
assert ga.large_wkt().extension_name == "geoarrow.wkt"
74+
assert ga.wkt_view().extension_name == "geoarrow.wkt"
7375
assert ga.point().extension_name == "geoarrow.point"
7476
assert ga.linestring().extension_name == "geoarrow.linestring"
7577
assert ga.polygon().extension_name == "geoarrow.polygon"
@@ -131,6 +133,24 @@ def test_array():
131133
assert array.type.storage_type == pa.large_binary()
132134

133135

136+
def test_array_view_types():
137+
# This one requires pyarrow >= 18, because that's when the necessary
138+
# cast() was added.
139+
try:
140+
pa.array(["foofy"]).cast(pa.string_view())
141+
except pa.lib.ArrowNotImplementedError:
142+
pytest.skip("ga.array() with view types requires pyarrow >= 18.0.0")
143+
144+
array = ga.array(["POINT (30 10)"], ga.wkt_view())
145+
assert array.type == ga.wkt_view()
146+
assert array.type.storage_type == pa.string_view()
147+
148+
wkb_item = b"\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x3e\x40\x00\x00\x00\x00\x00\x00\x24\x40"
149+
array = ga.array([wkb_item], ga.wkb_view())
150+
assert array.type == ga.wkb_view()
151+
assert array.type.storage_type == pa.binary_view()
152+
153+
134154
def test_array_repr():
135155
array = ga.array(["POINT (30 10)"])
136156
array_repr = repr(array)

geoarrow-types/src/geoarrow/types/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
large_wkb,
1818
wkt,
1919
large_wkt,
20+
wkb_view,
21+
wkt_view,
2022
box,
2123
point,
2224
linestring,
@@ -42,6 +44,8 @@
4244
"large_wkb",
4345
"wkt",
4446
"large_wkt",
47+
"wkb_view",
48+
"wkt_view",
4549
"geoarrow",
4650
"box",
4751
"point",

geoarrow-types/src/geoarrow/types/constants.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ class Encoding(TypeSpecEnum):
7373
7474
>>> from geoarrow import types
7575
>>> types.Encoding.GEOARROW
76-
<Encoding.GEOARROW: 5>
76+
<Encoding.GEOARROW: 7>
7777
"""
7878

7979
UNSPECIFIED = 0
@@ -91,7 +91,13 @@ class Encoding(TypeSpecEnum):
9191
LARGE_WKT = 4
9292
"""Well-known text encoding with 64-bit offsets"""
9393

94-
GEOARROW = 5
94+
WKB_VIEW = 5
95+
"""Well-known binary encoding using binary views as a storage type"""
96+
97+
WKT_VIEW = 6
98+
"""Well-known binary encoding using string views as a storage type"""
99+
100+
GEOARROW = 7
95101
"""GeoArrow native nested list encoding"""
96102

97103
def is_serialized(self):
@@ -100,6 +106,8 @@ def is_serialized(self):
100106
Encoding.LARGE_WKB,
101107
Encoding.WKT,
102108
Encoding.LARGE_WKT,
109+
Encoding.WKB_VIEW,
110+
Encoding.WKT_VIEW,
103111
)
104112

105113

@@ -260,10 +268,14 @@ class EdgeType(TypeSpecEnum):
260268
(Encoding.WKB, Encoding.LARGE_WKB): Encoding.LARGE_WKB,
261269
(Encoding.WKB, Encoding.WKT): Encoding.WKB,
262270
(Encoding.WKB, Encoding.LARGE_WKT): Encoding.LARGE_WKB,
271+
(Encoding.WKB, Encoding.WKB_VIEW): Encoding.WKB_VIEW,
263272
(Encoding.WKB, Encoding.GEOARROW): Encoding.WKB,
273+
(Encoding.WKB_VIEW, Encoding.LARGE_WKB): Encoding.WKB_VIEW,
264274
(Encoding.WKT, Encoding.LARGE_WKT): Encoding.LARGE_WKT,
265275
(Encoding.WKT, Encoding.LARGE_WKB): Encoding.LARGE_WKB,
276+
(Encoding.WKT, Encoding.WKT_VIEW): Encoding.WKT_VIEW,
266277
(Encoding.WKT, Encoding.GEOARROW): Encoding.WKB,
278+
(Encoding.WKT_VIEW, Encoding.LARGE_WKT): Encoding.WKT_VIEW,
267279
(GeometryType.POINT, GeometryType.MULTIPOINT): GeometryType.MULTIPOINT,
268280
(
269281
GeometryType.LINESTRING,

geoarrow-types/src/geoarrow/types/type_pyarrow.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -558,6 +558,10 @@ def _parse_storage(storage_type):
558558
return [("string", ())]
559559
elif pa_types.is_large_string(storage_type):
560560
return [("large_string", ())]
561+
elif hasattr(pa_types, "is_binary_view") and pa_types.is_binary_view(storage_type):
562+
return [("binary_view", ())]
563+
elif hasattr(pa_types, "is_string_view") and pa_types.is_string_view(storage_type):
564+
return [("string_view", ())]
561565
elif pa_types.is_float64(storage_type):
562566
return [("double", ())]
563567
elif isinstance(storage_type, pa.ListType):
@@ -1014,6 +1018,10 @@ def _spec_short_repr(spec, ext_name):
10141018
Encoding.LARGE_WKB: pa.large_binary(),
10151019
}
10161020

1021+
if hasattr(pa, "binary_view"):
1022+
_SERIALIZED_STORAGE_TYPES[Encoding.WKT_VIEW] = pa.string_view()
1023+
_SERIALIZED_STORAGE_TYPES[Encoding.WKB_VIEW] = pa.binary_view()
1024+
10171025
_NATIVE_STORAGE_TYPES = _generate_storage_types()
10181026
_add_union_types_to_native_storage_types()
10191027

@@ -1022,6 +1030,8 @@ def _spec_short_repr(spec, ext_name):
10221030
("large_binary",): Encoding.LARGE_WKB,
10231031
("string",): Encoding.WKT,
10241032
("large_string",): Encoding.LARGE_WKT,
1033+
("binary_view",): Encoding.WKB_VIEW,
1034+
("string_view",): Encoding.WKT_VIEW,
10251035
("struct",): TypeSpec(
10261036
encoding=Encoding.GEOARROW,
10271037
geometry_type=GeometryType.POINT,

geoarrow-types/src/geoarrow/types/type_spec.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -357,12 +357,22 @@ def wkb(*, edge_type=None, crs=crs.UNSPECIFIED) -> TypeSpec:
357357
def large_wkb(*, edge_type=None, crs=crs.UNSPECIFIED) -> TypeSpec:
358358
"""Large well-known binary encoding
359359
360-
Create a :class:`TypeSpec` denoting a well-known binary type with
360+
Create a :class:`TypeSpec` denoting a well-known binary type with
361361
64-bit data offsets. See :func:`type_spec` for parameter definitions.
362362
"""
363363
return type_spec(encoding=Encoding.LARGE_WKB, edge_type=edge_type, crs=crs)
364364

365365

366+
def wkb_view(*, edge_type=None, crs=crs.UNSPECIFIED) -> TypeSpec:
367+
"""Well-known binary view encoding
368+
369+
Create a :class:`TypeSpec` denoting a well-known binary type using
370+
binary views as the underlying storage type. See :func:`type_spec`
371+
for parameter definitions.
372+
"""
373+
return type_spec(encoding=Encoding.WKB_VIEW, edge_type=edge_type, crs=crs)
374+
375+
366376
def wkt(*, edge_type=None, crs=crs.UNSPECIFIED) -> TypeSpec:
367377
"""Well-known text encoding
368378
@@ -381,6 +391,16 @@ def large_wkt(*, edge_type=None, crs=crs.UNSPECIFIED) -> TypeSpec:
381391
return type_spec(encoding=Encoding.LARGE_WKT, edge_type=edge_type, crs=crs)
382392

383393

394+
def wkt_view(*, edge_type=None, crs=crs.UNSPECIFIED) -> TypeSpec:
395+
"""Well-known text encoding
396+
397+
Create a :class:`TypeSpec` denoting a well-known text type using
398+
string views as the underlying storage type. See :func:`type_spec`
399+
for parameter definitions.
400+
"""
401+
return type_spec(encoding=Encoding.WKT_VIEW, edge_type=edge_type, crs=crs)
402+
403+
384404
def geoarrow(
385405
*,
386406
geometry_type=None,
@@ -619,6 +639,8 @@ def type_spec(
619639
Encoding.LARGE_WKB: "geoarrow.wkb",
620640
Encoding.WKT: "geoarrow.wkt",
621641
Encoding.LARGE_WKT: "geoarrow.wkt",
642+
Encoding.WKB_VIEW: "geoarrow.wkb",
643+
Encoding.WKT_VIEW: "geoarrow.wkt",
622644
}
623645

624646
_GEOARROW_EXT_NAMES = {

geoarrow-types/tests/test_type_pyarrow.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -441,6 +441,8 @@ def test_multipolygon_array_from_geobuffers():
441441
gt.large_wkt(),
442442
gt.wkb(),
443443
gt.large_wkb(),
444+
gt.wkt_view(),
445+
gt.wkb_view(),
444446
# Geometry types
445447
gt.box(),
446448
gt.point(),
@@ -470,6 +472,12 @@ def test_multipolygon_array_from_geobuffers():
470472
],
471473
)
472474
def test_roundtrip_extension_type(spec):
475+
if not hasattr(pa, "binary_view") and spec.encoding in (
476+
gt.Encoding.WKB_VIEW,
477+
gt.Encoding.WKT_VIEW,
478+
):
479+
pytest.skip("binary_view/string_view requires pyarrow >= 14")
480+
473481
extension_type = type_pyarrow.extension_type(spec)
474482
serialized = extension_type.__arrow_ext_serialize__()
475483
extension_type2 = type_pyarrow._deserialize_storage(

0 commit comments

Comments
 (0)