From 7a0272b97d2ede89bc1806f675360e4dff20b3ec Mon Sep 17 00:00:00 2001 From: Shenyang Cai Date: Thu, 15 Jan 2026 19:28:10 +0000 Subject: [PATCH 1/2] chore: define type logging bit masks --- bigframes/core/logging/__init__.py | 4 +- bigframes/core/logging/data_types.py | 66 +++++++++++++++++++++ tests/unit/core/logging/test_data_types.py | 69 ++++++++++++++++++++++ 3 files changed, 137 insertions(+), 2 deletions(-) create mode 100644 bigframes/core/logging/data_types.py create mode 100644 tests/unit/core/logging/test_data_types.py diff --git a/bigframes/core/logging/__init__.py b/bigframes/core/logging/__init__.py index 95c077a99a..5d06124efc 100644 --- a/bigframes/core/logging/__init__.py +++ b/bigframes/core/logging/__init__.py @@ -12,6 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -from bigframes.core.logging import log_adapter +from bigframes.core.logging import data_types, log_adapter -__all__ = ["log_adapter"] +__all__ = ["log_adapter", "data_types"] diff --git a/bigframes/core/logging/data_types.py b/bigframes/core/logging/data_types.py new file mode 100644 index 0000000000..4bf45a8b65 --- /dev/null +++ b/bigframes/core/logging/data_types.py @@ -0,0 +1,66 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from bigframes import dtypes + + +def _add_data_type(existing_types: int, curr_type: dtypes.Dtype) -> int: + return existing_types | _get_dtype_mask(curr_type) + + +def _get_dtype_mask(dtype: dtypes.Dtype) -> int: + match dtype: + case dtypes.INT_DTYPE: + return 1 << 1 + case dtypes.FLOAT_DTYPE: + return 1 << 2 + case dtypes.BOOL_DTYPE: + return 1 << 3 + case dtypes.STRING_DTYPE: + return 1 << 4 + case dtypes.BYTES_DTYPE: + return 1 << 5 + case dtypes.DATE_DTYPE: + return 1 << 6 + case dtypes.TIME_DTYPE: + return 1 << 7 + case dtypes.DATETIME_DTYPE: + return 1 << 8 + case dtypes.TIMESTAMP_DTYPE: + return 1 << 9 + case dtypes.TIMEDELTA_DTYPE: + return 1 << 10 + case dtypes.NUMERIC_DTYPE: + return 1 << 11 + case dtypes.BIGNUMERIC_DTYPE: + return 1 << 12 + case dtypes.GEO_DTYPE: + return 1 << 13 + case dtypes.JSON_DTYPE: + return 1 << 14 + + if dtypes.is_struct_like(dtype): + mask = 1 << 15 + if dtype == dtypes.OBJ_REF_DTYPE: + # obj_ref is a special struct type for multi-modal data. + # It should be double counted as both "struct" and its own type. + mask = mask | (1 << 17) + return mask + + if dtypes.is_array_like(dtype): + return 1 << 16 + + # If an unknown datat type is present, mark it with the least significant bit. + return 1 << 0 diff --git a/tests/unit/core/logging/test_data_types.py b/tests/unit/core/logging/test_data_types.py new file mode 100644 index 0000000000..9e3d1f1ed0 --- /dev/null +++ b/tests/unit/core/logging/test_data_types.py @@ -0,0 +1,69 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pandas as pd +import pyarrow as pa +import pytest + +from bigframes import dtypes +from bigframes.core.logging import data_types + +UNKNOWN_TYPE = pd.ArrowDtype(pa.time64("ns")) + +PA_STRUCT_TYPE = pa.struct([("city", pa.string()), ("pop", pa.int64())]) + +PA_LIST_TYPE = pa.list_(pa.int64()) + + +@pytest.mark.parametrize( + ("dtype", "expected_mask"), + [ + (UNKNOWN_TYPE, 1 << 0), + (dtypes.INT_DTYPE, 1 << 1), + (dtypes.FLOAT_DTYPE, 1 << 2), + (dtypes.BOOL_DTYPE, 1 << 3), + (dtypes.STRING_DTYPE, 1 << 4), + (dtypes.BYTES_DTYPE, 1 << 5), + (dtypes.DATE_DTYPE, 1 << 6), + (dtypes.TIME_DTYPE, 1 << 7), + (dtypes.DATETIME_DTYPE, 1 << 8), + (dtypes.TIMESTAMP_DTYPE, 1 << 9), + (dtypes.TIMEDELTA_DTYPE, 1 << 10), + (dtypes.NUMERIC_DTYPE, 1 << 11), + (dtypes.BIGNUMERIC_DTYPE, 1 << 12), + (dtypes.GEO_DTYPE, 1 << 13), + (dtypes.JSON_DTYPE, 1 << 14), + (pd.ArrowDtype(PA_STRUCT_TYPE), 1 << 15), + (pd.ArrowDtype(PA_LIST_TYPE), 1 << 16), + (dtypes.OBJ_REF_DTYPE, (1 << 15) | (1 << 17)), + ], +) +def test_get_dtype_mask(dtype, expected_mask): + assert data_types._get_dtype_mask(dtype) == expected_mask + + +def test_add_data_type__type_overlap_no_op(): + curr_type = dtypes.STRING_DTYPE + existing_types = data_types._get_dtype_mask(curr_type) + + assert data_types._add_data_type(existing_types, curr_type) == existing_types + + +def test_add_data_type__new_type_updated(): + curr_type = dtypes.STRING_DTYPE + existing_types = data_types._get_dtype_mask(dtypes.INT_DTYPE) + + assert data_types._add_data_type( + existing_types, curr_type + ) == existing_types | data_types._get_dtype_mask(curr_type) From 328d04820a2dd98650164f2d1979a57387a136c2 Mon Sep 17 00:00:00 2001 From: Shenyang Cai Date: Thu, 15 Jan 2026 19:36:19 +0000 Subject: [PATCH 2/2] make code compatible with py 3.9 --- bigframes/core/logging/data_types.py | 57 ++++++++++++++-------------- 1 file changed, 28 insertions(+), 29 deletions(-) diff --git a/bigframes/core/logging/data_types.py b/bigframes/core/logging/data_types.py index 4bf45a8b65..db99b1a020 100644 --- a/bigframes/core/logging/data_types.py +++ b/bigframes/core/logging/data_types.py @@ -21,35 +21,34 @@ def _add_data_type(existing_types: int, curr_type: dtypes.Dtype) -> int: def _get_dtype_mask(dtype: dtypes.Dtype) -> int: - match dtype: - case dtypes.INT_DTYPE: - return 1 << 1 - case dtypes.FLOAT_DTYPE: - return 1 << 2 - case dtypes.BOOL_DTYPE: - return 1 << 3 - case dtypes.STRING_DTYPE: - return 1 << 4 - case dtypes.BYTES_DTYPE: - return 1 << 5 - case dtypes.DATE_DTYPE: - return 1 << 6 - case dtypes.TIME_DTYPE: - return 1 << 7 - case dtypes.DATETIME_DTYPE: - return 1 << 8 - case dtypes.TIMESTAMP_DTYPE: - return 1 << 9 - case dtypes.TIMEDELTA_DTYPE: - return 1 << 10 - case dtypes.NUMERIC_DTYPE: - return 1 << 11 - case dtypes.BIGNUMERIC_DTYPE: - return 1 << 12 - case dtypes.GEO_DTYPE: - return 1 << 13 - case dtypes.JSON_DTYPE: - return 1 << 14 + if dtype == dtypes.INT_DTYPE: + return 1 << 1 + if dtype == dtypes.FLOAT_DTYPE: + return 1 << 2 + if dtype == dtypes.BOOL_DTYPE: + return 1 << 3 + if dtype == dtypes.STRING_DTYPE: + return 1 << 4 + if dtype == dtypes.BYTES_DTYPE: + return 1 << 5 + if dtype == dtypes.DATE_DTYPE: + return 1 << 6 + if dtype == dtypes.TIME_DTYPE: + return 1 << 7 + if dtype == dtypes.DATETIME_DTYPE: + return 1 << 8 + if dtype == dtypes.TIMESTAMP_DTYPE: + return 1 << 9 + if dtype == dtypes.TIMEDELTA_DTYPE: + return 1 << 10 + if dtype == dtypes.NUMERIC_DTYPE: + return 1 << 11 + if dtype == dtypes.BIGNUMERIC_DTYPE: + return 1 << 12 + if dtype == dtypes.GEO_DTYPE: + return 1 << 13 + if dtype == dtypes.JSON_DTYPE: + return 1 << 14 if dtypes.is_struct_like(dtype): mask = 1 << 15