Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 55 additions & 0 deletions vertexai/_genai/_datasets_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,61 @@ def create_from_response(model_type: Type[T], response: dict[str, Any]) -> T:
return model_type(**filtered_response)


def multimodal_dataset_get_bigquery_uri(
multimodal_dataset: common.MultimodalDataset,
) -> str:
"""Gets the bigquery uri from a multimodal dataset or raises ValueError."""
if (
not hasattr(multimodal_dataset, "metadata")
or multimodal_dataset.metadata is None
):
raise ValueError("Multimodal dataset metadata is required.")
if (
not hasattr(multimodal_dataset.metadata, "input_config")
or multimodal_dataset.metadata.input_config is None
):
raise ValueError("Multimodal dataset input config is required.")
if (
not hasattr(multimodal_dataset.metadata.input_config, "bigquery_source")
or multimodal_dataset.metadata.input_config.bigquery_source is None
):
raise ValueError("Multimodal dataset input config bigquery source is required.")
if (
not hasattr(multimodal_dataset.metadata.input_config.bigquery_source, "uri")
or multimodal_dataset.metadata.input_config.bigquery_source.uri is None
):
raise ValueError(
"Multimodal dataset input config bigquery source uri is required."
)
return str(multimodal_dataset.metadata.input_config.bigquery_source.uri)


def multimodal_dataset_set_bigquery_uri(
multimodal_dataset: common.MultimodalDataset,
bigquery_uri: str,
) -> None:
"""Sets the bigquery uri from a multimodal dataset or raises ValueError."""
metadata = (
common.SchemaTablesDatasetMetadata()
if multimodal_dataset.metadata is None
else multimodal_dataset.metadata
)
input_config = (
common.SchemaTablesDatasetMetadataInputConfig()
if metadata.input_config is None
else metadata.input_config
)
bigquery_source = (
common.SchemaTablesDatasetMetadataBigQuerySource()
if input_config.bigquery_source is None
else input_config.bigquery_source
)
bigquery_source.uri = bigquery_uri
input_config.bigquery_source = bigquery_source
metadata.input_config = input_config
multimodal_dataset.metadata = metadata


def _try_import_bigframes() -> Any:
"""Tries to import `bigframes`."""
try:
Expand Down
136 changes: 20 additions & 116 deletions vertexai/_genai/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -790,35 +790,11 @@ def create_from_bigquery(
"""
if isinstance(multimodal_dataset, dict):
multimodal_dataset = types.MultimodalDataset(**multimodal_dataset)
if (
not hasattr(multimodal_dataset, "metadata")
or multimodal_dataset.metadata is None
):
raise ValueError("Multimodal dataset metadata is required.")
if (
not hasattr(multimodal_dataset.metadata, "input_config")
or multimodal_dataset.metadata.input_config is None
):
raise ValueError("Multimodal dataset input config is required.")
if (
not hasattr(multimodal_dataset.metadata.input_config, "bigquery_source")
or multimodal_dataset.metadata.input_config.bigquery_source is None
):
raise ValueError(
"Multimodal dataset input config bigquery source is required."
)
if (
not hasattr(multimodal_dataset.metadata.input_config.bigquery_source, "uri")
or multimodal_dataset.metadata.input_config.bigquery_source.uri is None
):
raise ValueError(
"Multimodal dataset input config bigquery source uri is required."
)
if not multimodal_dataset.metadata.input_config.bigquery_source.uri.startswith(
"bq://"
):
multimodal_dataset.metadata.input_config.bigquery_source.uri = (
f"bq://{multimodal_dataset.metadata.input_config.bigquery_source.uri}"

uri = _datasets_utils.multimodal_dataset_get_bigquery_uri(multimodal_dataset)
if not uri.startswith("bq://"):
_datasets_utils.multimodal_dataset_set_bigquery_uri(
multimodal_dataset, f"bq://{uri}"
)
if isinstance(config, dict):
config = types.CreateMultimodalDatasetConfig(**config)
Expand Down Expand Up @@ -948,35 +924,11 @@ def update_multimodal_dataset(
"""
if isinstance(multimodal_dataset, dict):
multimodal_dataset = types.MultimodalDataset(**multimodal_dataset)
if (
not hasattr(multimodal_dataset, "metadata")
or multimodal_dataset.metadata is None
):
raise ValueError("Multimodal dataset metadata is required.")
if (
not hasattr(multimodal_dataset.metadata, "input_config")
or multimodal_dataset.metadata.input_config is None
):
raise ValueError("Multimodal dataset input config is required.")
if (
not hasattr(multimodal_dataset.metadata.input_config, "bigquery_source")
or multimodal_dataset.metadata.input_config.bigquery_source is None
):
raise ValueError(
"Multimodal dataset input config bigquery source is required."
)
if (
not hasattr(multimodal_dataset.metadata.input_config.bigquery_source, "uri")
or multimodal_dataset.metadata.input_config.bigquery_source.uri is None
):
raise ValueError(
"Multimodal dataset input config bigquery source uri is required."
)
if not multimodal_dataset.metadata.input_config.bigquery_source.uri.startswith(
"bq://"
):
multimodal_dataset.metadata.input_config.bigquery_source.uri = (
f"bq://{multimodal_dataset.metadata.input_config.bigquery_source.uri}"

uri = _datasets_utils.multimodal_dataset_get_bigquery_uri(multimodal_dataset)
if not uri.startswith("bq://"):
_datasets_utils.multimodal_dataset_set_bigquery_uri(
multimodal_dataset, f"bq://{uri}"
)
if isinstance(config, dict):
config = types.CreateMultimodalDatasetConfig(**config)
Expand Down Expand Up @@ -1887,35 +1839,11 @@ async def create_from_bigquery(
"""
if isinstance(multimodal_dataset, dict):
multimodal_dataset = types.MultimodalDataset(**multimodal_dataset)
if (
not hasattr(multimodal_dataset, "metadata")
or multimodal_dataset.metadata is None
):
raise ValueError("Multimodal dataset metadata is required.")
if (
not hasattr(multimodal_dataset.metadata, "input_config")
or multimodal_dataset.metadata.input_config is None
):
raise ValueError("Multimodal dataset input config is required.")
if (
not hasattr(multimodal_dataset.metadata.input_config, "bigquery_source")
or multimodal_dataset.metadata.input_config.bigquery_source is None
):
raise ValueError(
"Multimodal dataset input config bigquery source is required."
)
if (
not hasattr(multimodal_dataset.metadata.input_config.bigquery_source, "uri")
or multimodal_dataset.metadata.input_config.bigquery_source.uri is None
):
raise ValueError(
"Multimodal dataset input config bigquery source uri is required."
)
if not multimodal_dataset.metadata.input_config.bigquery_source.uri.startswith(
"bq://"
):
multimodal_dataset.metadata.input_config.bigquery_source.uri = (
f"bq://{multimodal_dataset.metadata.input_config.bigquery_source.uri}"

uri = _datasets_utils.multimodal_dataset_get_bigquery_uri(multimodal_dataset)
if not uri.startswith("bq://"):
_datasets_utils.multimodal_dataset_set_bigquery_uri(
multimodal_dataset, f"bq://{uri}"
)
if isinstance(config, dict):
config = types.CreateMultimodalDatasetConfig(**config)
Expand Down Expand Up @@ -2041,35 +1969,11 @@ async def update_multimodal_dataset(
"""
if isinstance(multimodal_dataset, dict):
multimodal_dataset = types.MultimodalDataset(**multimodal_dataset)
if (
not hasattr(multimodal_dataset, "metadata")
or multimodal_dataset.metadata is None
):
raise ValueError("Multimodal dataset metadata is required.")
if (
not hasattr(multimodal_dataset.metadata, "input_config")
or multimodal_dataset.metadata.input_config is None
):
raise ValueError("Multimodal dataset input config is required.")
if (
not hasattr(multimodal_dataset.metadata.input_config, "bigquery_source")
or multimodal_dataset.metadata.input_config.bigquery_source is None
):
raise ValueError(
"Multimodal dataset input config bigquery source is required."
)
if (
not hasattr(multimodal_dataset.metadata.input_config.bigquery_source, "uri")
or multimodal_dataset.metadata.input_config.bigquery_source.uri is None
):
raise ValueError(
"Multimodal dataset input config bigquery source uri is required."
)
if not multimodal_dataset.metadata.input_config.bigquery_source.uri.startswith(
"bq://"
):
multimodal_dataset.metadata.input_config.bigquery_source.uri = (
f"bq://{multimodal_dataset.metadata.input_config.bigquery_source.uri}"

uri = _datasets_utils.multimodal_dataset_get_bigquery_uri(multimodal_dataset)
if not uri.startswith("bq://"):
_datasets_utils.multimodal_dataset_set_bigquery_uri(
multimodal_dataset, f"bq://{uri}"
)
if isinstance(config, dict):
config = types.CreateMultimodalDatasetConfig(**config)
Expand Down
Loading