This repository was archived by the owner on Dec 1, 2025. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathbackends.py
More file actions
46 lines (35 loc) · 1.72 KB
/
backends.py
File metadata and controls
46 lines (35 loc) · 1.72 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
# Python 3.9 doesn't support "|" for types
from __future__ import annotations
import nested_pandas as npd
import pandas as pd
from dask._dispatch import get_collection_type
from dask.dataframe.backends import meta_nonempty_dataframe
from dask.dataframe.dispatch import make_meta_dispatch
from dask.dataframe.extensions import make_array_nonempty
from dask.dataframe.utils import meta_nonempty
from nested_pandas.series.ext_array import NestedExtensionArray
from .core import NestedFrame
get_collection_type.register(npd.NestedFrame, lambda _: NestedFrame)
# The following dispatch functions are defined as per the Dask extension guide:
# https://docs.dask.org/en/latest/dataframe-extend.html
@make_meta_dispatch.register(npd.NestedFrame)
def make_meta_frame(x, index=None) -> npd.NestedFrame:
"""Create an empty NestedFrame to use as Dask's underlying object meta."""
dtypes = x.dtypes.to_dict()
index = index if index is not None else x.index
index = index[:0].copy()
result = npd.NestedFrame({key: pd.Series(dtype=d) for key, d in dtypes.items()}, index=index)
return result
@meta_nonempty.register(npd.NestedFrame)
def _nonempty_nestedframe(x, index=None) -> npd.NestedFrame:
"""Construct a new NestedFrame with the same underlying data."""
df = meta_nonempty_dataframe(x)
if index is not None:
df.index = index
return npd.NestedFrame(df)
@make_array_nonempty.register(npd.NestedDtype)
def _(dtype) -> NestedExtensionArray:
"""Register a valid dtype for the NestedExtensionArray"""
# must be two values to avoid a length error in meta inference
# Dask seems to explicitly require meta dtypes to have length 2.
return NestedExtensionArray._from_sequence([pd.NA, pd.NA], dtype=dtype)