Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions bigframes/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -4301,6 +4301,32 @@ def to_gbq(
result_table = result.query_job.destination
assert result_table is not None

obj_ref_dest_cols = []
for col_id in id_overrides.keys():
try:
if (
export_array.get_column_type(col_id)
== bigframes.dtypes.OBJ_REF_DTYPE
):
obj_ref_dest_cols.append(id_overrides[col_id])
except Exception:
pass

if obj_ref_dest_cols:
table = self._session.bqclient.get_table(result_table)
new_schema = []
for field in table.schema:
if field.name in obj_ref_dest_cols:
field_dict = field.to_api_repr()
field_dict["description"] = "bigframes_dtype: OBJ_REF_DTYPE"
new_schema.append(
google.cloud.bigquery.SchemaField.from_api_repr(field_dict)
)
else:
new_schema.append(field)
table.schema = new_schema
self._session.bqclient.update_table(table, ["schema"])

if temp_table_ref:
bigframes.session._io.bigquery.set_table_expiration(
self._session.bqclient,
Expand Down
7 changes: 7 additions & 0 deletions bigframes/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -772,6 +772,13 @@ def convert_schema_field(
) -> typing.Tuple[str, Dtype]:
is_repeated = field.mode == "REPEATED"
if field.field_type == "RECORD":
if field.description == "bigframes_dtype: OBJ_REF_DTYPE":
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IIUC, we can changes both convert_to_schema_field and convert_schema_field to achieve round-trip persistence without changes in dataframe.py.
Please check how TIMEDELTA_DESCRIPTION_TAG works.

bf_dtype = OBJ_REF_DTYPE # type: ignore
if is_repeated:
pa_type = pa.list_(bigframes_dtype_to_arrow_dtype(bf_dtype))
bf_dtype = pd.ArrowDtype(pa_type)
return field.name, bf_dtype

mapped_fields = map(convert_schema_field, field.fields)
fields = []
for name, dtype in mapped_fields:
Expand Down
25 changes: 25 additions & 0 deletions tests/system/small/test_dataframe_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -1002,6 +1002,31 @@ def test_to_gbq_timedelta_tag_ignored_when_appending(bigquery_client, dataset_id
assert table.schema[0].description is None


def test_to_gbq_obj_ref(session, dataset_id: str, bigquery_client):
destination_table = f"{dataset_id}.test_to_gbq_obj_ref"
sql = """
SELECT
'gs://cloud-samples-data/vision/ocr/sign.jpg' AS uri_col
"""
df = session.read_gbq(sql)
df["obj_ref_col"] = df["uri_col"].str.to_blob()
df = df.drop(columns=["uri_col"])

# Save the dataframe to bigquery
df.to_gbq(destination_table)

# Verify the table schema description is added
table = bigquery_client.get_table(destination_table)
obj_ref_field = next(f for f in table.schema if f.name == "obj_ref_col")
assert obj_ref_field.field_type == "RECORD"
assert obj_ref_field.description == "bigframes_dtype: OBJ_REF_DTYPE"

# Verify reloading it correctly restores the dtype
reloaded_df = session.read_gbq(destination_table)
assert reloaded_df["obj_ref_col"].dtype == dtypes.OBJ_REF_DTYPE
assert len(reloaded_df) == 1


@pytest.mark.parametrize(
("index"),
[True, False],
Expand Down
Loading