From 88b4a5687691468df14b0c9d30e4dc0532d9de8b Mon Sep 17 00:00:00 2001 From: "hotdata-automation[bot]" <267177015+hotdata-automation[bot]@users.noreply.github.com> Date: Wed, 17 Jun 2026 21:41:59 +0000 Subject: [PATCH] chore: make api doc language end-user focused --- CHANGELOG.md | 6 ++ docs/ConnectionsApi.md | 10 +-- docs/CreateConnectionRequest.md | 1 - docs/CreateDatabaseRequest.md | 3 +- docs/CreateDatasetRequest.md | 1 - docs/DatabaseDefaultSchemaDecl.md | 2 +- docs/DatabaseDefaultTableDecl.md | 2 +- docs/DatabasesApi.md | 6 +- docs/IndexesApi.md | 24 +++---- docs/JobType.md | 2 + docs/LoadManagedTableRequest.md | 2 +- docs/LoadManagedTableResponse.md | 2 +- docs/ManagedSchemaResponse.md | 2 +- docs/QueryApi.md | 1 + docs/QueryResponse.md | 12 ++-- docs/QueryRunInfo.md | 4 +- docs/ResultsApi.md | 10 +-- docs/ResultsFormatQuery.md | 2 +- hotdata/api/connections_api.py | 24 +++---- hotdata/api/databases_api.py | 18 ++--- hotdata/api/indexes_api.py | 72 +++++++++---------- hotdata/api/query_api.py | 3 + hotdata/api/results_api.py | 18 ++--- hotdata/api_client.py | 2 +- hotdata/models/create_connection_request.py | 11 +-- hotdata/models/create_database_request.py | 13 +--- hotdata/models/create_dataset_request.py | 9 +-- .../models/database_default_schema_decl.py | 2 +- hotdata/models/database_default_table_decl.py | 2 +- hotdata/models/get_result_response.py | 5 +- hotdata/models/job_type.py | 1 + hotdata/models/load_managed_table_request.py | 2 +- hotdata/models/load_managed_table_response.py | 2 +- hotdata/models/managed_schema_response.py | 2 +- hotdata/models/query_response.py | 12 ++-- hotdata/models/query_run_info.py | 18 ++++- hotdata/models/results_format_query.py | 2 +- pyproject.toml | 2 +- test/test_create_connection_request.py | 3 +- test/test_create_database_request.py | 3 +- test/test_create_dataset_request.py | 1 - test/test_get_result_response.py | 2 +- test/test_list_query_runs_response.py | 4 ++ test/test_query_run_info.py | 2 + 44 files changed, 167 insertions(+), 160 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 36e2c7f..959656a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.4.1] - 2026-06-17 + +### Changed + +- chore: make api doc language end-user focused + ## [0.4.0] - 2026-06-16 ### Added diff --git a/docs/ConnectionsApi.md b/docs/ConnectionsApi.md index 75c2d18..c43e846 100644 --- a/docs/ConnectionsApi.md +++ b/docs/ConnectionsApi.md @@ -23,7 +23,7 @@ Method | HTTP request | Description Add managed schema -Declare a new schema (and optionally its tables) on an existing managed catalog after creation. The schema is added to the connection's declaration; declared tables can then be populated via the managed-table load endpoint. Only valid against connections whose source type is `managed`. Identifiers are normalised to lowercase. +Declare a new schema (and optionally its tables) on an existing managed catalog after creation. The schema is added to the connection's declaration; declared tables can then be populated via the managed-table load endpoint. Only valid against connections whose source type is `managed`. Identifiers are normalized to lowercase. ### Example @@ -114,7 +114,7 @@ Name | Type | Description | Notes Add managed table -Declare a new table on an existing schema of a managed catalog after creation. The table is added empty (declared-but-unloaded) and can be populated via the managed-table load endpoint. Only valid against connections whose source type is `managed`. Identifiers are normalised to lowercase. +Declare a new table on an existing schema of a managed catalog after creation. The table is added empty (declared-but-unloaded) and can be populated via the managed-table load endpoint. Only valid against connections whose source type is `managed`. Identifiers are normalized to lowercase. ### Example @@ -465,7 +465,7 @@ void (empty response body) Delete managed table -Delete a single managed-catalog table. The catalog row is removed and the backing parquet file (if any) is scheduled for deletion. Only valid against connections whose source type is `managed`. +Delete a single managed-catalog table. The table and its data are removed. Only valid against connections whose source type is `managed`. ### Example @@ -810,7 +810,7 @@ This endpoint does not need any parameter. Load managed table from upload -Publish a previously-uploaded parquet file as the new generation of a managed table. The upload must reference a parquet file (verified by magic bytes). Only `mode = "replace"` is supported. Concurrent loads against the same upload return 409. +Publish a previously-uploaded parquet file as the new contents of a managed table. The upload must reference a parquet file. Only `mode = "replace"` is supported. Concurrent loads against the same upload return 409. ### Example @@ -979,7 +979,7 @@ void (empty response body) | Status code | Description | Response headers | |-------------|-------------|------------------| **204** | Cache purged | - | -**400** | Managed catalogs own their parquet generations and cannot be purged | - | +**400** | Managed catalogs own their data and cannot be cache-purged | - | **404** | Connection not found | - | **409** | Connection backs a database's default catalog and cannot be purged directly | - | diff --git a/docs/CreateConnectionRequest.md b/docs/CreateConnectionRequest.md index f55d1ff..7fbf6e0 100644 --- a/docs/CreateConnectionRequest.md +++ b/docs/CreateConnectionRequest.md @@ -12,7 +12,6 @@ Name | Type | Description | Notes **secret_name** | **str** | Optional reference to a secret by name. If provided, this secret will be used for authentication. Mutually exclusive with `secret_id`. | [optional] **skip_discovery** | **bool** | If true, skip automatic schema discovery after registering the connection. The connection will be created but no tables will be discovered. You can run discovery later via the refresh endpoint. | [optional] **source_type** | **str** | | -**storage_backend** | **str** | Physical storage backend for tables created under this connection. `\"parquet\"` (default) uses the versioned parquet cache. `\"ducklake\"` stores data in a DuckLake catalog in the shared metadata DB configured via `ducklake.metadata_pg_url`; accepted for any source type and requires that pool to be configured. | [optional] ## Example diff --git a/docs/CreateDatabaseRequest.md b/docs/CreateDatabaseRequest.md index 4afafec..ed6d218 100644 --- a/docs/CreateDatabaseRequest.md +++ b/docs/CreateDatabaseRequest.md @@ -9,8 +9,7 @@ Name | Type | Description | Notes **default_catalog** | **str** | Optional name the database's auto-created default catalog answers to inside its query scope. Must be a valid SQL identifier (`[a-z0-9_]`, not starting with a digit) and may not collide with the system catalogs `hotdata`, `datasets`, or `information_schema`. Defaults to `default` when omitted, so `default.main.<table>` keeps working. | [optional] **expires_at** | **str** | When this database expires. Accepts either an RFC 3339 timestamp (e.g. `\"2026-06-01T00:00:00Z\"`) or a relative duration suffixed with `h` (hours), `m` (minutes), or `d` (days) — for example `\"24h\"`, `\"48h\"`, or `\"7d\"`. Omitted (or empty) means the database never expires. Expiry is best-effort: the database will not be deleted before `expires_at`, but cleanup may run later than the exact timestamp. | [optional] **name** | **str** | Optional free-form display label (for UIs/CLIs). Not unique. Not an identifier — databases are always addressed by `id`. Accepts the legacy `description` key as an alias so clients that predate the rename keep populating this field. | [optional] -**schemas** | [**List[DatabaseDefaultSchemaDecl]**](DatabaseDefaultSchemaDecl.md) | Optional schemas/tables to declare on the database's auto-created default catalog. Mirrors the `config.schemas` field of a managed `POST /v1/connections`. Tables declared here can be loaded via the standard managed-table load endpoint targeting `default_connection_id`. Omitted or empty means the default catalog starts empty. | [optional] -**storage_backend** | **str** | Physical storage backend for the database's auto-created `default` catalog. `\"parquet\"` (default) uses the versioned parquet cache. `\"ducklake\"` stores data in a DuckLake catalog in the shared metadata DB configured via `ducklake.metadata_pg_url`, which must be configured for that value to be accepted. Omitted means `\"parquet\"`. | [optional] +**schemas** | [**List[DatabaseDefaultSchemaDecl]**](DatabaseDefaultSchemaDecl.md) | Optional schemas/tables to declare on the database's auto-created default catalog. Tables declared here can be loaded via the standard managed-table load endpoint targeting `default_connection_id`. Omitted or empty means the default catalog starts empty. | [optional] ## Example diff --git a/docs/CreateDatasetRequest.md b/docs/CreateDatasetRequest.md index db73987..72e78e3 100644 --- a/docs/CreateDatasetRequest.md +++ b/docs/CreateDatasetRequest.md @@ -8,7 +8,6 @@ Name | Type | Description | Notes ------------ | ------------- | ------------- | ------------- **label** | **str** | | **source** | [**DatasetSource**](DatasetSource.md) | | -**storage_backend** | **str** | Optional storage backend: `\"parquet\"` (default) or `\"ducklake\"`. `\"ducklake\"` requires `ducklake.metadata_pg_url` to be configured at engine boot; the engine also rejects the combo of `storage_backend: \"ducklake\"` with a saved-query source or with explicit geometry columns (both deferred to a follow-up). | [optional] **table_name** | **str** | Optional table_name - if not provided, derived from label | [optional] ## Example diff --git a/docs/DatabaseDefaultSchemaDecl.md b/docs/DatabaseDefaultSchemaDecl.md index 0ccc869..46e52f5 100644 --- a/docs/DatabaseDefaultSchemaDecl.md +++ b/docs/DatabaseDefaultSchemaDecl.md @@ -1,6 +1,6 @@ # DatabaseDefaultSchemaDecl -One schema declaration inside the database's default catalog at create time. Mirrors `crate::source::ManagedSchemaDecl`. Tables default to empty so callers can declare just a schema name and add tables later via the managed-tables API on the default connection. +One schema declaration inside the database's default catalog, supplied at create time. `tables` defaults to empty, so you can declare just a schema name and add tables later. ## Properties diff --git a/docs/DatabaseDefaultTableDecl.md b/docs/DatabaseDefaultTableDecl.md index 8bd348d..13b8dc9 100644 --- a/docs/DatabaseDefaultTableDecl.md +++ b/docs/DatabaseDefaultTableDecl.md @@ -1,6 +1,6 @@ # DatabaseDefaultTableDecl -One table declaration inside a default-catalog schema at database-create time. Mirrors `crate::source::ManagedTableDecl` shape so the controller can convert with a simple `.map`. +One table declaration inside a default-catalog schema, supplied at database-create time. ## Properties diff --git a/docs/DatabasesApi.md b/docs/DatabasesApi.md index d15dca2..6439c20 100644 --- a/docs/DatabasesApi.md +++ b/docs/DatabasesApi.md @@ -19,7 +19,7 @@ Method | HTTP request | Description Add schema to database default catalog -Declare a new schema (and optionally its tables) on the database's auto-created default catalog after creation. The schema becomes reachable inside the database scope (e.g. `default..` and `information_schema.schemata`) without the caller addressing the internal default connection directly. Identifiers are normalised to lowercase. +Declare a new schema (and optionally its tables) on the database's auto-created default catalog after creation. The schema becomes reachable inside the database scope (e.g. `default..
` and `information_schema.schemata`) without the caller addressing the internal default connection directly. Identifiers are normalized to lowercase. ### Example @@ -110,7 +110,7 @@ Name | Type | Description | Notes Add table to database default catalog -Declare a new table on an existing schema of the database's default catalog after creation. The table is added empty (declared-but-unloaded) and can be populated via the managed-table load endpoint targeting the default connection. Identifiers are normalised to lowercase. +Declare a new table on an existing schema of the database's default catalog after creation. The table is added empty (declared-but-unloaded) and can be populated via the managed-table load endpoint targeting the default connection. Identifiers are normalized to lowercase. ### Example @@ -291,7 +291,7 @@ void (empty response body) Create database -Create a new database (a metadata-only grouping). A managed default catalog is auto-created and addressable inside the database as `default` (or the optional `default_catalog` name), with a `main` schema pre-declared so `default.main.
` works out of the box. The optional `name` is a free-form display label and is not required to be unique. Optional `default_catalog` overrides the name the default catalog answers to; it must be a valid SQL identifier and may not collide with the `hotdata`, `datasets`, or `information_schema` system catalogs. Optional `schemas` declares additional schemas/tables on the default catalog at create time; declared tables can be loaded via the standard managed-tables-load endpoint targeting `default_connection_id`. Optional `expires_at` sets when the database expires — accepts either an RFC 3339 timestamp or a relative duration suffixed with `h` (hours), `m` (minutes), or `d` (days), e.g. `24h`, `48h`, `90m`, `7d`. When omitted, the database never expires. Expiry is best-effort: the database will not be deleted before `expires_at`, but cleanup may run later than the exact timestamp. Optional `storage_backend` selects the physical backend for the default catalog — `parquet` (default) or `ducklake` (requires `ducklake.metadata_pg_url` to be configured). +Create a new database (a metadata-only grouping). A managed default catalog is auto-created and addressable inside the database as `default` (or the optional `default_catalog` name), with a `main` schema pre-declared so `default.main.
` works out of the box. The optional `name` is a free-form display label and is not required to be unique. Optional `default_catalog` overrides the name the default catalog answers to; it must be a valid SQL identifier and may not collide with the `hotdata`, `datasets`, or `information_schema` system catalogs. Optional `schemas` declares additional schemas/tables on the default catalog at create time; declared tables can be loaded via the standard managed-tables-load endpoint targeting `default_connection_id`. Optional `expires_at` sets when the database expires — accepts either an RFC 3339 timestamp or a relative duration suffixed with `h` (hours), `m` (minutes), or `d` (days), e.g. `24h`, `48h`, `90m`, `7d`. When omitted, the database never expires. Expiry is best-effort: the database will not be deleted before `expires_at`, but cleanup may run later than the exact timestamp. ### Example diff --git a/docs/IndexesApi.md b/docs/IndexesApi.md index 5838fc0..47c6224 100644 --- a/docs/IndexesApi.md +++ b/docs/IndexesApi.md @@ -65,7 +65,7 @@ configuration = hotdata.Configuration( with hotdata.ApiClient(configuration) as api_client: # Create an instance of the API class api_instance = hotdata.IndexesApi(api_client) - dataset_id = 'dataset_id_example' # str | Dataset identifier + dataset_id = 'dataset_id_example' # str | Dataset ID create_index_request = hotdata.CreateIndexRequest() # CreateIndexRequest | try: @@ -84,7 +84,7 @@ with hotdata.ApiClient(configuration) as api_client: Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- - **dataset_id** | **str**| Dataset identifier | + **dataset_id** | **str**| Dataset ID | **create_index_request** | [**CreateIndexRequest**](CreateIndexRequest.md)| | ### Return type @@ -156,7 +156,7 @@ configuration = hotdata.Configuration( with hotdata.ApiClient(configuration) as api_client: # Create an instance of the API class api_instance = hotdata.IndexesApi(api_client) - connection_id = 'connection_id_example' # str | Connection identifier + connection_id = 'connection_id_example' # str | Connection ID var_schema = 'var_schema_example' # str | Schema name table = 'table_example' # str | Table name create_index_request = hotdata.CreateIndexRequest() # CreateIndexRequest | @@ -177,7 +177,7 @@ with hotdata.ApiClient(configuration) as api_client: Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- - **connection_id** | **str**| Connection identifier | + **connection_id** | **str**| Connection ID | **var_schema** | **str**| Schema name | **table** | **str**| Table name | **create_index_request** | [**CreateIndexRequest**](CreateIndexRequest.md)| | @@ -256,7 +256,7 @@ configuration = hotdata.Configuration( with hotdata.ApiClient(configuration) as api_client: # Create an instance of the API class api_instance = hotdata.IndexesApi(api_client) - dataset_id = 'dataset_id_example' # str | Dataset identifier + dataset_id = 'dataset_id_example' # str | Dataset ID index_name = 'index_name_example' # str | Index name try: @@ -273,7 +273,7 @@ with hotdata.ApiClient(configuration) as api_client: Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- - **dataset_id** | **str**| Dataset identifier | + **dataset_id** | **str**| Dataset ID | **index_name** | **str**| Index name | ### Return type @@ -341,7 +341,7 @@ configuration = hotdata.Configuration( with hotdata.ApiClient(configuration) as api_client: # Create an instance of the API class api_instance = hotdata.IndexesApi(api_client) - connection_id = 'connection_id_example' # str | Connection identifier + connection_id = 'connection_id_example' # str | Connection ID var_schema = 'var_schema_example' # str | Schema name table = 'table_example' # str | Table name index_name = 'index_name_example' # str | Index name @@ -360,7 +360,7 @@ with hotdata.ApiClient(configuration) as api_client: Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- - **connection_id** | **str**| Connection identifier | + **connection_id** | **str**| Connection ID | **var_schema** | **str**| Schema name | **table** | **str**| Table name | **index_name** | **str**| Index name | @@ -439,7 +439,7 @@ configuration = hotdata.Configuration( with hotdata.ApiClient(configuration) as api_client: # Create an instance of the API class api_instance = hotdata.IndexesApi(api_client) - dataset_id = 'dataset_id_example' # str | Dataset identifier + dataset_id = 'dataset_id_example' # str | Dataset ID try: # List indexes on a dataset @@ -457,7 +457,7 @@ with hotdata.ApiClient(configuration) as api_client: Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- - **dataset_id** | **str**| Dataset identifier | + **dataset_id** | **str**| Dataset ID | ### Return type @@ -525,7 +525,7 @@ configuration = hotdata.Configuration( with hotdata.ApiClient(configuration) as api_client: # Create an instance of the API class api_instance = hotdata.IndexesApi(api_client) - connection_id = 'connection_id_example' # str | Connection identifier + connection_id = 'connection_id_example' # str | Connection ID var_schema = 'var_schema_example' # str | Schema name table = 'table_example' # str | Table name @@ -545,7 +545,7 @@ with hotdata.ApiClient(configuration) as api_client: Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- - **connection_id** | **str**| Connection identifier | + **connection_id** | **str**| Connection ID | **var_schema** | **str**| Schema name | **table** | **str**| Table name | diff --git a/docs/JobType.md b/docs/JobType.md index a3c9d43..3d7ada6 100644 --- a/docs/JobType.md +++ b/docs/JobType.md @@ -16,6 +16,8 @@ Background job types returned by the API. * `CREATE_DATASET_INDEX` (value: `'create_dataset_index'`) +* `MANAGED_LOAD` (value: `'managed_load'`) + [[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) diff --git a/docs/LoadManagedTableRequest.md b/docs/LoadManagedTableRequest.md index 08d5549..de03667 100644 --- a/docs/LoadManagedTableRequest.md +++ b/docs/LoadManagedTableRequest.md @@ -1,6 +1,6 @@ # LoadManagedTableRequest -Request body for `POST /v1/connections/{connection_id}/schemas/{schema}/tables/{table}/loads`. Publishes a previously-uploaded parquet file as the new generation for the named managed table. `mode` is fixed to `\"replace\"` today; the field is kept in the request body so future modes (e.g. append) are an additive change. +Request body for `POST /v1/connections/{connection_id}/schemas/{schema}/tables/{table}/loads`. Publishes a previously-uploaded parquet file as the new contents of the named managed table. `mode` is fixed to `\"replace\"` today; the field is kept in the request body so future modes (e.g. append) are an additive change. ## Properties diff --git a/docs/LoadManagedTableResponse.md b/docs/LoadManagedTableResponse.md index 0131cbc..3d29b6c 100644 --- a/docs/LoadManagedTableResponse.md +++ b/docs/LoadManagedTableResponse.md @@ -6,7 +6,7 @@ Response body for `POST /v1/connections/{connection_id}/schemas/{schema}/tables/ Name | Type | Description | Notes ------------ | ------------- | ------------- | ------------- -**arrow_schema_json** | **str** | Arrow schema (JSON) parsed from the uploaded parquet footer. | +**arrow_schema_json** | **str** | Schema of the loaded table, as JSON. | **connection_id** | **str** | | **row_count** | **int** | Total rows in the published parquet file. | **schema_name** | **str** | | diff --git a/docs/ManagedSchemaResponse.md b/docs/ManagedSchemaResponse.md index 8f9194a..228a3f2 100644 --- a/docs/ManagedSchemaResponse.md +++ b/docs/ManagedSchemaResponse.md @@ -1,6 +1,6 @@ # ManagedSchemaResponse -Response body for a successful add-schema request. Echoes the normalised (lowercased) names so callers see exactly what was persisted. +Response body for a successful add-schema request. Echoes the normalized (lowercased) names so callers see exactly what was persisted. ## Properties diff --git a/docs/QueryApi.md b/docs/QueryApi.md index 9f8340e..bd6b254 100644 --- a/docs/QueryApi.md +++ b/docs/QueryApi.md @@ -106,6 +106,7 @@ Name | Type | Description | Notes **404** | Database not found | - | **429** | Too many concurrent queries; retry after the Retry-After delay | - | **500** | Internal server error | - | +**503** | Result store temporarily unavailable (a truncated result could not be persisted); retry after the Retry-After delay | - | [[Back to top]](#) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to Model list]](../README.md#documentation-for-models) [[Back to README]](../README.md) diff --git a/docs/QueryResponse.md b/docs/QueryResponse.md index 1eb521a..555bc29 100644 --- a/docs/QueryResponse.md +++ b/docs/QueryResponse.md @@ -9,14 +9,14 @@ Name | Type | Description | Notes **columns** | **List[str]** | | **execution_time_ms** | **int** | | **nullable** | **List[bool]** | Nullable flags for each column (parallel to columns vec). True if the column allows NULL values, false if NOT NULL. | -**preview_row_count** | **int** | Number of rows in *this* response body (`rows.len()`). Always present. For a large result this is a bounded preview, not the grand total — see `total_row_count` and `truncated` (#640). | +**preview_row_count** | **int** | Number of rows in *this* response body. Always present. For a large result this is a bounded preview, not the grand total — see `total_row_count` and `truncated`. | **query_run_id** | **str** | Unique identifier for the query run record (qrun...). | -**result_id** | **str** | Unique identifier for retrieving this result via GET /results/{id}. Null if catalog registration failed (see `warning` field for details). When non-null, the result is being persisted asynchronously. | [optional] -**row_count** | **int** | **Deprecated** — use `preview_row_count` (rows in this body) and `total_row_count` (grand total) instead. Retained for backward compatibility and currently always equal to `preview_row_count`; it will be removed in a future release once clients migrate to the count fields below (#640). | +**result_id** | **str** | Unique identifier for retrieving this result via GET /results/{id}. When non-null, the result is being persisted asynchronously. Null only when the result fit entirely in this response (`truncated: false`) but could not be persisted for later retrieval — see the `warning` field. A `truncated: true` response ALWAYS carries a non-null, resolvable `result_id` (#640 F1): a truncated result that cannot be persisted fails the request with a retryable HTTP 503 (`PERSISTENCE_UNAVAILABLE`, with a `Retry-After` header) rather than returning a partial body with a dead ticket. | [optional] +**row_count** | **int** | **Deprecated** — use `preview_row_count` (rows in this body) and `total_row_count` (grand total) instead. Retained as a back-compat alias and always equal to `preview_row_count`; for a truncated result it is the preview count, *not* the grand total — read `total_row_count` for that. Will be removed in a future release once clients migrate. | **rows** | **List[List[object]]** | Array of rows, where each row is an array of column values. Values can be strings, numbers, booleans, or null. | -**total_row_count** | **int** | Grand total rows in the full result. Present (and equal to `preview_row_count`) when the whole result fit in this response; `null` while a truncated result is still being persisted. When `null`, read the authoritative total from `GET /v1/query-runs/{id}` (`row_count`) or the `X-Total-Row-Count` header on `GET /v1/results/{id}` (#640). | [optional] -**truncated** | **bool** | True when `rows` is a bounded preview of a larger result. Fetch the full result via `result_id` (#640). Always `false` until bounded streaming is enabled; clients should still branch on it so no code change is needed when truncation goes live. | -**warning** | **str** | Warning message if result persistence could not be initiated. When present, `result_id` will be null and the result cannot be retrieved later. The query results are still returned in this response. | [optional] +**total_row_count** | **int** | Grand total rows in the full result. Present (and equal to `preview_row_count`) when the whole result fit in this response; `null` while a truncated result is still being persisted. When `null`, read the authoritative total from `GET /v1/query-runs/{id}` (`row_count`) or the `X-Total-Row-Count` header on `GET /v1/results/{id}`. | [optional] +**truncated** | **bool** | True when `rows` is a bounded preview of a larger result. Fetch the full result via `result_id`. | +**warning** | **str** | Warning message if result persistence could not be initiated. Present only when the full result is returned inline (`truncated: false`) but could not be persisted: `result_id` is then null and the result cannot be re-fetched later, though every row is in this response. A truncated result never carries a warning — if it cannot be persisted the request fails with a retryable HTTP 503 (`PERSISTENCE_UNAVAILABLE`, with a `Retry-After` header) instead (#640 F1). | [optional] ## Example diff --git a/docs/QueryRunInfo.md b/docs/QueryRunInfo.md index 2fb0b39..e2b1cf2 100644 --- a/docs/QueryRunInfo.md +++ b/docs/QueryRunInfo.md @@ -6,6 +6,7 @@ Single query run for listing Name | Type | Description | Notes ------------ | ------------- | ------------- | ------------- +**bytes_scanned** | **int** | Total bytes of table data read from storage to run this query. `null` when the query touches no table at all (for example a constant expression like `SELECT 1`). May be `0` when the query reads a table but not its row data — for example a row count served from table statistics. | [optional] **completed_at** | **datetime** | | [optional] **created_at** | **datetime** | | **error_message** | **str** | | [optional] @@ -13,6 +14,7 @@ Name | Type | Description | Notes **id** | **str** | | **result_id** | **str** | | [optional] **row_count** | **int** | | [optional] +**rows_scanned** | **int** | Total rows read from storage to run this query, before any filtering or aggregation. Distinct from `row_count`, which is how many rows the query returned. `null` when the query reads no table data from storage. | [optional] **saved_query_id** | **str** | | [optional] **saved_query_version** | **int** | | [optional] **server_processing_ms** | **int** | Total server-side processing time for this query (milliseconds). Measured from query start to result ready. Includes SQL execution, task spawning, and result preparation. Does not include network transit. Populated for all completed query runs (sync and async). | [optional] @@ -21,7 +23,7 @@ Name | Type | Description | Notes **sql_text** | **str** | | **status** | **str** | | **trace_id** | **str** | | [optional] -**user_public_id** | **str** | Caller identity derived from the Authorization Bearer token (SHA-256 hash). Format: `user_{first_10_hex_chars}`. Mirrors the webapp's `user_public_id_from_auth_header`. | [optional] +**user_public_id** | **str** | Caller identity derived from the Authorization Bearer token (SHA-256 hash). Format: `user_{first_10_hex_chars}`. | [optional] **warning_message** | **str** | | [optional] ## Example diff --git a/docs/ResultsApi.md b/docs/ResultsApi.md index 2348835..e1ade8d 100644 --- a/docs/ResultsApi.md +++ b/docs/ResultsApi.md @@ -26,9 +26,9 @@ Retrieve a persisted query result by ID. The response format for the `ready` sta | `failed` | 409 `application/json` `{status, result_id, error_message}` | | not found | 404 `application/json` (`ApiErrorResponse`) | -`?format=` accepts `arrow`, `json`, `csv`, `md`, `parquet` and takes precedence over `Accept`. `markdown` is accepted as a runtime alias for `md` (the OpenAPI enum lists only `md` to keep the SDK shape clean). Use `?offset=N&limit=M` to slice the result; `offset` defaults to 0 and `limit` is unbounded by default. Both must be non-negative; invalid values return 400. When a finite `limit` doesn't reach the end of the result, a `Link` header with `rel="next"` points at the following page. `?offset`/`?limit` are ignored for `format=parquet` since that path returns the underlying file unchanged. +`?format=` accepts `arrow`, `json`, `csv`, `md`, `parquet` and takes precedence over `Accept`. `markdown` is accepted as a runtime alias for `md`. Use `?offset=N&limit=M` to slice the result; `offset` defaults to 0 and `limit` is unbounded by default. Both must be non-negative; invalid values return 400. When a finite `limit` doesn't reach the end of the result, a `Link` header with `rel="next"` points at the following page. `?offset`/`?limit` are ignored for `format=parquet` since that path returns the underlying file unchanged. -Ready responses (Arrow, CSV, Markdown, JSON) carry `X-Total-Row-Count` (full result row count from parquet metadata, independent of offset/limit). The streaming paths run end-to-end with no spawned task between the parquet reader and the wire — clients can disconnect at any time and the server stops reading. +Ready responses (Arrow, CSV, Markdown, JSON) carry `X-Total-Row-Count` (the full result row count, independent of offset/limit). Responses are streamed end-to-end, so a client can disconnect at any time and the server stops reading. IEEE special floats (`±Inf`, `NaN`) have no canonical JSON representation. For cross-format consistency the JSON, CSV, and Markdown paths emit them as `null` / empty cells, and JSON `nullable[]` is widened to match. The Arrow IPC and Parquet bodies are binary round-trip formats and preserve the raw IEEE values; callers cross-checking a result across CSV and Parquet should not byte-compare those slots. @@ -80,7 +80,7 @@ with hotdata.ApiClient(configuration) as api_client: id = 'id_example' # str | Result ID offset = 56 # int | Rows to skip (default: 0) (optional) limit = 56 # int | Maximum rows to return (default: unbounded) (optional) - format = hotdata.ResultsFormatQuery() # ResultsFormatQuery | `arrow`, `json`, `csv`, `md`, or `parquet` — overrides the `Accept` header. `markdown` is also accepted at runtime as an alias for `md`, but is not in the OpenAPI enum to keep the SDK shape clean. (optional) + format = hotdata.ResultsFormatQuery() # ResultsFormatQuery | `arrow`, `json`, `csv`, `md`, or `parquet` — overrides the `Accept` header. `markdown` is also accepted at runtime as an alias for `md`. (optional) try: # Get result @@ -101,7 +101,7 @@ Name | Type | Description | Notes **id** | **str**| Result ID | **offset** | **int**| Rows to skip (default: 0) | [optional] **limit** | **int**| Maximum rows to return (default: unbounded) | [optional] - **format** | [**ResultsFormatQuery**](.md)| `arrow`, `json`, `csv`, `md`, or `parquet` — overrides the `Accept` header. `markdown` is also accepted at runtime as an alias for `md`, but is not in the OpenAPI enum to keep the SDK shape clean. | [optional] + **format** | [**ResultsFormatQuery**](.md)| `arrow`, `json`, `csv`, `md`, or `parquet` — overrides the `Accept` header. `markdown` is also accepted at runtime as an alias for `md`. | [optional] ### Return type @@ -120,7 +120,7 @@ Name | Type | Description | Notes | Status code | Description | Response headers | |-------------|-------------|------------------| -**200** | Result data. The body depends on the negotiated format: JSON callers receive `GetResultResponse`; Arrow callers receive an Arrow IPC stream; CSV callers receive comma-separated text (LF-terminated, double-quote escaped, RFC 4180-style quoting but not RFC 4180-strict on line endings); Markdown callers receive a single GitHub-flavored pipe table; Parquet callers receive the raw parquet bytes from object storage. Non-finite floats (`±Inf`, `NaN`) render as `null` (JSON) or empty cells (CSV, Markdown) for cross-format consistency. `Accept` is treated as a flat content-type list — `q=` quality values are ignored; use `?format=` to disambiguate. | * Link - RFC 5988 `Link` header with `rel=\"next\"` pointing at the next page when a finite `limit` does not reach the end of the result.
* X-Total-Row-Count - Total rows in the full result, ignoring offset/limit. Present only when status is `ready`.
| +**200** | Result data. The body depends on the negotiated format: JSON callers receive `GetResultResponse`; Arrow callers receive an Arrow IPC stream; CSV callers receive comma-separated text (LF-terminated, double-quote escaped, RFC 4180-style quoting but not RFC 4180-strict on line endings); Markdown callers receive a single GitHub-flavored pipe table; Parquet callers receive the raw parquet bytes, served as-is. Non-finite floats (`±Inf`, `NaN`) render as `null` (JSON) or empty cells (CSV, Markdown) for cross-format consistency. `Accept` is treated as a flat content-type list — `q=` quality values are ignored; use `?format=` to disambiguate. | * Link - RFC 5988 `Link` header with `rel=\"next\"` pointing at the next page when a finite `limit` does not reach the end of the result.
* X-Total-Row-Count - Total rows in the full result, ignoring offset/limit. Present only when status is `ready`.
| **202** | Result is still being computed (`pending` or `processing`). Poll the same URL. | * Retry-After - Suggested seconds before the next poll.
| **400** | Invalid offset, limit, or format. | - | **404** | Result not found. | - | diff --git a/docs/ResultsFormatQuery.md b/docs/ResultsFormatQuery.md index 5d10ce8..61b3afa 100644 --- a/docs/ResultsFormatQuery.md +++ b/docs/ResultsFormatQuery.md @@ -1,6 +1,6 @@ # ResultsFormatQuery -Schema for the `?format=` query parameter on `GET /v1/results/{id}`. Documents the canonical values that SDKs should treat as a closed set (`arrow`, `json`, `csv`, `md`, `parquet`). The runtime handler's negotiator (`negotiate_results_format`) additionally accepts `markdown` as an alias for `md` — case-insensitive, with unknown values falling through to the `Accept` header — but `markdown` is intentionally NOT listed in this enum so SDK generators emit a single canonical `Markdown` (or equivalent) variant rather than two distinct ones for the same logical format. +The `?format=` query parameter on `GET /v1/results/{id}`. One of `arrow`, `json`, `csv`, `md`, or `parquet`. ## Enum diff --git a/hotdata/api/connections_api.py b/hotdata/api/connections_api.py index 5d8b30a..1eb0b0c 100644 --- a/hotdata/api/connections_api.py +++ b/hotdata/api/connections_api.py @@ -69,7 +69,7 @@ def add_managed_schema( ) -> ManagedSchemaResponse: """Add managed schema - Declare a new schema (and optionally its tables) on an existing managed catalog after creation. The schema is added to the connection's declaration; declared tables can then be populated via the managed-table load endpoint. Only valid against connections whose source type is `managed`. Identifiers are normalised to lowercase. + Declare a new schema (and optionally its tables) on an existing managed catalog after creation. The schema is added to the connection's declaration; declared tables can then be populated via the managed-table load endpoint. Only valid against connections whose source type is `managed`. Identifiers are normalized to lowercase. :param connection_id: Connection ID (required) :type connection_id: str @@ -143,7 +143,7 @@ def add_managed_schema_with_http_info( ) -> ApiResponse[ManagedSchemaResponse]: """Add managed schema - Declare a new schema (and optionally its tables) on an existing managed catalog after creation. The schema is added to the connection's declaration; declared tables can then be populated via the managed-table load endpoint. Only valid against connections whose source type is `managed`. Identifiers are normalised to lowercase. + Declare a new schema (and optionally its tables) on an existing managed catalog after creation. The schema is added to the connection's declaration; declared tables can then be populated via the managed-table load endpoint. Only valid against connections whose source type is `managed`. Identifiers are normalized to lowercase. :param connection_id: Connection ID (required) :type connection_id: str @@ -217,7 +217,7 @@ def add_managed_schema_without_preload_content( ) -> RESTResponseType: """Add managed schema - Declare a new schema (and optionally its tables) on an existing managed catalog after creation. The schema is added to the connection's declaration; declared tables can then be populated via the managed-table load endpoint. Only valid against connections whose source type is `managed`. Identifiers are normalised to lowercase. + Declare a new schema (and optionally its tables) on an existing managed catalog after creation. The schema is added to the connection's declaration; declared tables can then be populated via the managed-table load endpoint. Only valid against connections whose source type is `managed`. Identifiers are normalized to lowercase. :param connection_id: Connection ID (required) :type connection_id: str @@ -369,7 +369,7 @@ def add_managed_table( ) -> ManagedTableResponse: """Add managed table - Declare a new table on an existing schema of a managed catalog after creation. The table is added empty (declared-but-unloaded) and can be populated via the managed-table load endpoint. Only valid against connections whose source type is `managed`. Identifiers are normalised to lowercase. + Declare a new table on an existing schema of a managed catalog after creation. The table is added empty (declared-but-unloaded) and can be populated via the managed-table load endpoint. Only valid against connections whose source type is `managed`. Identifiers are normalized to lowercase. :param connection_id: Connection ID (required) :type connection_id: str @@ -447,7 +447,7 @@ def add_managed_table_with_http_info( ) -> ApiResponse[ManagedTableResponse]: """Add managed table - Declare a new table on an existing schema of a managed catalog after creation. The table is added empty (declared-but-unloaded) and can be populated via the managed-table load endpoint. Only valid against connections whose source type is `managed`. Identifiers are normalised to lowercase. + Declare a new table on an existing schema of a managed catalog after creation. The table is added empty (declared-but-unloaded) and can be populated via the managed-table load endpoint. Only valid against connections whose source type is `managed`. Identifiers are normalized to lowercase. :param connection_id: Connection ID (required) :type connection_id: str @@ -525,7 +525,7 @@ def add_managed_table_without_preload_content( ) -> RESTResponseType: """Add managed table - Declare a new table on an existing schema of a managed catalog after creation. The table is added empty (declared-but-unloaded) and can be populated via the managed-table load endpoint. Only valid against connections whose source type is `managed`. Identifiers are normalised to lowercase. + Declare a new table on an existing schema of a managed catalog after creation. The table is added empty (declared-but-unloaded) and can be populated via the managed-table load endpoint. Only valid against connections whose source type is `managed`. Identifiers are normalized to lowercase. :param connection_id: Connection ID (required) :type connection_id: str @@ -1497,7 +1497,7 @@ def delete_managed_table( ) -> None: """Delete managed table - Delete a single managed-catalog table. The catalog row is removed and the backing parquet file (if any) is scheduled for deletion. Only valid against connections whose source type is `managed`. + Delete a single managed-catalog table. The table and its data are removed. Only valid against connections whose source type is `managed`. :param connection_id: Connection ID (required) :type connection_id: str @@ -1574,7 +1574,7 @@ def delete_managed_table_with_http_info( ) -> ApiResponse[None]: """Delete managed table - Delete a single managed-catalog table. The catalog row is removed and the backing parquet file (if any) is scheduled for deletion. Only valid against connections whose source type is `managed`. + Delete a single managed-catalog table. The table and its data are removed. Only valid against connections whose source type is `managed`. :param connection_id: Connection ID (required) :type connection_id: str @@ -1651,7 +1651,7 @@ def delete_managed_table_without_preload_content( ) -> RESTResponseType: """Delete managed table - Delete a single managed-catalog table. The catalog row is removed and the backing parquet file (if any) is scheduled for deletion. Only valid against connections whose source type is `managed`. + Delete a single managed-catalog table. The table and its data are removed. Only valid against connections whose source type is `managed`. :param connection_id: Connection ID (required) :type connection_id: str @@ -2603,7 +2603,7 @@ def load_managed_table( ) -> LoadManagedTableResponse: """Load managed table from upload - Publish a previously-uploaded parquet file as the new generation of a managed table. The upload must reference a parquet file (verified by magic bytes). Only `mode = \"replace\"` is supported. Concurrent loads against the same upload return 409. + Publish a previously-uploaded parquet file as the new contents of a managed table. The upload must reference a parquet file. Only `mode = \"replace\"` is supported. Concurrent loads against the same upload return 409. :param connection_id: Connection ID (required) :type connection_id: str @@ -2685,7 +2685,7 @@ def load_managed_table_with_http_info( ) -> ApiResponse[LoadManagedTableResponse]: """Load managed table from upload - Publish a previously-uploaded parquet file as the new generation of a managed table. The upload must reference a parquet file (verified by magic bytes). Only `mode = \"replace\"` is supported. Concurrent loads against the same upload return 409. + Publish a previously-uploaded parquet file as the new contents of a managed table. The upload must reference a parquet file. Only `mode = \"replace\"` is supported. Concurrent loads against the same upload return 409. :param connection_id: Connection ID (required) :type connection_id: str @@ -2767,7 +2767,7 @@ def load_managed_table_without_preload_content( ) -> RESTResponseType: """Load managed table from upload - Publish a previously-uploaded parquet file as the new generation of a managed table. The upload must reference a parquet file (verified by magic bytes). Only `mode = \"replace\"` is supported. Concurrent loads against the same upload return 409. + Publish a previously-uploaded parquet file as the new contents of a managed table. The upload must reference a parquet file. Only `mode = \"replace\"` is supported. Concurrent loads against the same upload return 409. :param connection_id: Connection ID (required) :type connection_id: str diff --git a/hotdata/api/databases_api.py b/hotdata/api/databases_api.py index 4dacdef..a2a2b6b 100644 --- a/hotdata/api/databases_api.py +++ b/hotdata/api/databases_api.py @@ -66,7 +66,7 @@ def add_database_schema( ) -> ManagedSchemaResponse: """Add schema to database default catalog - Declare a new schema (and optionally its tables) on the database's auto-created default catalog after creation. The schema becomes reachable inside the database scope (e.g. `default..
` and `information_schema.schemata`) without the caller addressing the internal default connection directly. Identifiers are normalised to lowercase. + Declare a new schema (and optionally its tables) on the database's auto-created default catalog after creation. The schema becomes reachable inside the database scope (e.g. `default..
` and `information_schema.schemata`) without the caller addressing the internal default connection directly. Identifiers are normalized to lowercase. :param database_id: Database ID (required) :type database_id: str @@ -140,7 +140,7 @@ def add_database_schema_with_http_info( ) -> ApiResponse[ManagedSchemaResponse]: """Add schema to database default catalog - Declare a new schema (and optionally its tables) on the database's auto-created default catalog after creation. The schema becomes reachable inside the database scope (e.g. `default..
` and `information_schema.schemata`) without the caller addressing the internal default connection directly. Identifiers are normalised to lowercase. + Declare a new schema (and optionally its tables) on the database's auto-created default catalog after creation. The schema becomes reachable inside the database scope (e.g. `default..
` and `information_schema.schemata`) without the caller addressing the internal default connection directly. Identifiers are normalized to lowercase. :param database_id: Database ID (required) :type database_id: str @@ -214,7 +214,7 @@ def add_database_schema_without_preload_content( ) -> RESTResponseType: """Add schema to database default catalog - Declare a new schema (and optionally its tables) on the database's auto-created default catalog after creation. The schema becomes reachable inside the database scope (e.g. `default..
` and `information_schema.schemata`) without the caller addressing the internal default connection directly. Identifiers are normalised to lowercase. + Declare a new schema (and optionally its tables) on the database's auto-created default catalog after creation. The schema becomes reachable inside the database scope (e.g. `default..
` and `information_schema.schemata`) without the caller addressing the internal default connection directly. Identifiers are normalized to lowercase. :param database_id: Database ID (required) :type database_id: str @@ -366,7 +366,7 @@ def add_database_table( ) -> ManagedTableResponse: """Add table to database default catalog - Declare a new table on an existing schema of the database's default catalog after creation. The table is added empty (declared-but-unloaded) and can be populated via the managed-table load endpoint targeting the default connection. Identifiers are normalised to lowercase. + Declare a new table on an existing schema of the database's default catalog after creation. The table is added empty (declared-but-unloaded) and can be populated via the managed-table load endpoint targeting the default connection. Identifiers are normalized to lowercase. :param database_id: Database ID (required) :type database_id: str @@ -444,7 +444,7 @@ def add_database_table_with_http_info( ) -> ApiResponse[ManagedTableResponse]: """Add table to database default catalog - Declare a new table on an existing schema of the database's default catalog after creation. The table is added empty (declared-but-unloaded) and can be populated via the managed-table load endpoint targeting the default connection. Identifiers are normalised to lowercase. + Declare a new table on an existing schema of the database's default catalog after creation. The table is added empty (declared-but-unloaded) and can be populated via the managed-table load endpoint targeting the default connection. Identifiers are normalized to lowercase. :param database_id: Database ID (required) :type database_id: str @@ -522,7 +522,7 @@ def add_database_table_without_preload_content( ) -> RESTResponseType: """Add table to database default catalog - Declare a new table on an existing schema of the database's default catalog after creation. The table is added empty (declared-but-unloaded) and can be populated via the managed-table load endpoint targeting the default connection. Identifiers are normalised to lowercase. + Declare a new table on an existing schema of the database's default catalog after creation. The table is added empty (declared-but-unloaded) and can be populated via the managed-table load endpoint targeting the default connection. Identifiers are normalized to lowercase. :param database_id: Database ID (required) :type database_id: str @@ -977,7 +977,7 @@ def create_database( ) -> CreateDatabaseResponse: """Create database - Create a new database (a metadata-only grouping). A managed default catalog is auto-created and addressable inside the database as `default` (or the optional `default_catalog` name), with a `main` schema pre-declared so `default.main.
` works out of the box. The optional `name` is a free-form display label and is not required to be unique. Optional `default_catalog` overrides the name the default catalog answers to; it must be a valid SQL identifier and may not collide with the `hotdata`, `datasets`, or `information_schema` system catalogs. Optional `schemas` declares additional schemas/tables on the default catalog at create time; declared tables can be loaded via the standard managed-tables-load endpoint targeting `default_connection_id`. Optional `expires_at` sets when the database expires — accepts either an RFC 3339 timestamp or a relative duration suffixed with `h` (hours), `m` (minutes), or `d` (days), e.g. `24h`, `48h`, `90m`, `7d`. When omitted, the database never expires. Expiry is best-effort: the database will not be deleted before `expires_at`, but cleanup may run later than the exact timestamp. Optional `storage_backend` selects the physical backend for the default catalog — `parquet` (default) or `ducklake` (requires `ducklake.metadata_pg_url` to be configured). + Create a new database (a metadata-only grouping). A managed default catalog is auto-created and addressable inside the database as `default` (or the optional `default_catalog` name), with a `main` schema pre-declared so `default.main.
` works out of the box. The optional `name` is a free-form display label and is not required to be unique. Optional `default_catalog` overrides the name the default catalog answers to; it must be a valid SQL identifier and may not collide with the `hotdata`, `datasets`, or `information_schema` system catalogs. Optional `schemas` declares additional schemas/tables on the default catalog at create time; declared tables can be loaded via the standard managed-tables-load endpoint targeting `default_connection_id`. Optional `expires_at` sets when the database expires — accepts either an RFC 3339 timestamp or a relative duration suffixed with `h` (hours), `m` (minutes), or `d` (days), e.g. `24h`, `48h`, `90m`, `7d`. When omitted, the database never expires. Expiry is best-effort: the database will not be deleted before `expires_at`, but cleanup may run later than the exact timestamp. :param create_database_request: (required) :type create_database_request: CreateDatabaseRequest @@ -1046,7 +1046,7 @@ def create_database_with_http_info( ) -> ApiResponse[CreateDatabaseResponse]: """Create database - Create a new database (a metadata-only grouping). A managed default catalog is auto-created and addressable inside the database as `default` (or the optional `default_catalog` name), with a `main` schema pre-declared so `default.main.
` works out of the box. The optional `name` is a free-form display label and is not required to be unique. Optional `default_catalog` overrides the name the default catalog answers to; it must be a valid SQL identifier and may not collide with the `hotdata`, `datasets`, or `information_schema` system catalogs. Optional `schemas` declares additional schemas/tables on the default catalog at create time; declared tables can be loaded via the standard managed-tables-load endpoint targeting `default_connection_id`. Optional `expires_at` sets when the database expires — accepts either an RFC 3339 timestamp or a relative duration suffixed with `h` (hours), `m` (minutes), or `d` (days), e.g. `24h`, `48h`, `90m`, `7d`. When omitted, the database never expires. Expiry is best-effort: the database will not be deleted before `expires_at`, but cleanup may run later than the exact timestamp. Optional `storage_backend` selects the physical backend for the default catalog — `parquet` (default) or `ducklake` (requires `ducklake.metadata_pg_url` to be configured). + Create a new database (a metadata-only grouping). A managed default catalog is auto-created and addressable inside the database as `default` (or the optional `default_catalog` name), with a `main` schema pre-declared so `default.main.
` works out of the box. The optional `name` is a free-form display label and is not required to be unique. Optional `default_catalog` overrides the name the default catalog answers to; it must be a valid SQL identifier and may not collide with the `hotdata`, `datasets`, or `information_schema` system catalogs. Optional `schemas` declares additional schemas/tables on the default catalog at create time; declared tables can be loaded via the standard managed-tables-load endpoint targeting `default_connection_id`. Optional `expires_at` sets when the database expires — accepts either an RFC 3339 timestamp or a relative duration suffixed with `h` (hours), `m` (minutes), or `d` (days), e.g. `24h`, `48h`, `90m`, `7d`. When omitted, the database never expires. Expiry is best-effort: the database will not be deleted before `expires_at`, but cleanup may run later than the exact timestamp. :param create_database_request: (required) :type create_database_request: CreateDatabaseRequest @@ -1115,7 +1115,7 @@ def create_database_without_preload_content( ) -> RESTResponseType: """Create database - Create a new database (a metadata-only grouping). A managed default catalog is auto-created and addressable inside the database as `default` (or the optional `default_catalog` name), with a `main` schema pre-declared so `default.main.
` works out of the box. The optional `name` is a free-form display label and is not required to be unique. Optional `default_catalog` overrides the name the default catalog answers to; it must be a valid SQL identifier and may not collide with the `hotdata`, `datasets`, or `information_schema` system catalogs. Optional `schemas` declares additional schemas/tables on the default catalog at create time; declared tables can be loaded via the standard managed-tables-load endpoint targeting `default_connection_id`. Optional `expires_at` sets when the database expires — accepts either an RFC 3339 timestamp or a relative duration suffixed with `h` (hours), `m` (minutes), or `d` (days), e.g. `24h`, `48h`, `90m`, `7d`. When omitted, the database never expires. Expiry is best-effort: the database will not be deleted before `expires_at`, but cleanup may run later than the exact timestamp. Optional `storage_backend` selects the physical backend for the default catalog — `parquet` (default) or `ducklake` (requires `ducklake.metadata_pg_url` to be configured). + Create a new database (a metadata-only grouping). A managed default catalog is auto-created and addressable inside the database as `default` (or the optional `default_catalog` name), with a `main` schema pre-declared so `default.main.
` works out of the box. The optional `name` is a free-form display label and is not required to be unique. Optional `default_catalog` overrides the name the default catalog answers to; it must be a valid SQL identifier and may not collide with the `hotdata`, `datasets`, or `information_schema` system catalogs. Optional `schemas` declares additional schemas/tables on the default catalog at create time; declared tables can be loaded via the standard managed-tables-load endpoint targeting `default_connection_id`. Optional `expires_at` sets when the database expires — accepts either an RFC 3339 timestamp or a relative duration suffixed with `h` (hours), `m` (minutes), or `d` (days), e.g. `24h`, `48h`, `90m`, `7d`. When omitted, the database never expires. Expiry is best-effort: the database will not be deleted before `expires_at`, but cleanup may run later than the exact timestamp. :param create_database_request: (required) :type create_database_request: CreateDatabaseRequest diff --git a/hotdata/api/indexes_api.py b/hotdata/api/indexes_api.py index 9e3113c..971a32a 100644 --- a/hotdata/api/indexes_api.py +++ b/hotdata/api/indexes_api.py @@ -45,7 +45,7 @@ def __init__(self, api_client=None) -> None: @validate_call def create_dataset_index( self, - dataset_id: Annotated[StrictStr, Field(description="Dataset identifier")], + dataset_id: Annotated[StrictStr, Field(description="Dataset ID")], create_index_request: CreateIndexRequest, _request_timeout: Union[ None, @@ -64,7 +64,7 @@ def create_dataset_index( Create a sorted, BM25, or vector index on a dataset. - :param dataset_id: Dataset identifier (required) + :param dataset_id: Dataset ID (required) :type dataset_id: str :param create_index_request: (required) :type create_index_request: CreateIndexRequest @@ -119,7 +119,7 @@ def create_dataset_index( @validate_call def create_dataset_index_with_http_info( self, - dataset_id: Annotated[StrictStr, Field(description="Dataset identifier")], + dataset_id: Annotated[StrictStr, Field(description="Dataset ID")], create_index_request: CreateIndexRequest, _request_timeout: Union[ None, @@ -138,7 +138,7 @@ def create_dataset_index_with_http_info( Create a sorted, BM25, or vector index on a dataset. - :param dataset_id: Dataset identifier (required) + :param dataset_id: Dataset ID (required) :type dataset_id: str :param create_index_request: (required) :type create_index_request: CreateIndexRequest @@ -193,7 +193,7 @@ def create_dataset_index_with_http_info( @validate_call def create_dataset_index_without_preload_content( self, - dataset_id: Annotated[StrictStr, Field(description="Dataset identifier")], + dataset_id: Annotated[StrictStr, Field(description="Dataset ID")], create_index_request: CreateIndexRequest, _request_timeout: Union[ None, @@ -212,7 +212,7 @@ def create_dataset_index_without_preload_content( Create a sorted, BM25, or vector index on a dataset. - :param dataset_id: Dataset identifier (required) + :param dataset_id: Dataset ID (required) :type dataset_id: str :param create_index_request: (required) :type create_index_request: CreateIndexRequest @@ -345,7 +345,7 @@ def _create_dataset_index_serialize( @validate_call def create_index( self, - connection_id: Annotated[StrictStr, Field(description="Connection identifier")], + connection_id: Annotated[StrictStr, Field(description="Connection ID")], var_schema: Annotated[StrictStr, Field(description="Schema name")], table: Annotated[StrictStr, Field(description="Table name")], create_index_request: CreateIndexRequest, @@ -366,7 +366,7 @@ def create_index( Create a sorted or BM25 full-text index on a cached table. - :param connection_id: Connection identifier (required) + :param connection_id: Connection ID (required) :type connection_id: str :param var_schema: Schema name (required) :type var_schema: str @@ -427,7 +427,7 @@ def create_index( @validate_call def create_index_with_http_info( self, - connection_id: Annotated[StrictStr, Field(description="Connection identifier")], + connection_id: Annotated[StrictStr, Field(description="Connection ID")], var_schema: Annotated[StrictStr, Field(description="Schema name")], table: Annotated[StrictStr, Field(description="Table name")], create_index_request: CreateIndexRequest, @@ -448,7 +448,7 @@ def create_index_with_http_info( Create a sorted or BM25 full-text index on a cached table. - :param connection_id: Connection identifier (required) + :param connection_id: Connection ID (required) :type connection_id: str :param var_schema: Schema name (required) :type var_schema: str @@ -509,7 +509,7 @@ def create_index_with_http_info( @validate_call def create_index_without_preload_content( self, - connection_id: Annotated[StrictStr, Field(description="Connection identifier")], + connection_id: Annotated[StrictStr, Field(description="Connection ID")], var_schema: Annotated[StrictStr, Field(description="Schema name")], table: Annotated[StrictStr, Field(description="Table name")], create_index_request: CreateIndexRequest, @@ -530,7 +530,7 @@ def create_index_without_preload_content( Create a sorted or BM25 full-text index on a cached table. - :param connection_id: Connection identifier (required) + :param connection_id: Connection ID (required) :type connection_id: str :param var_schema: Schema name (required) :type var_schema: str @@ -674,7 +674,7 @@ def _create_index_serialize( @validate_call def delete_dataset_index( self, - dataset_id: Annotated[StrictStr, Field(description="Dataset identifier")], + dataset_id: Annotated[StrictStr, Field(description="Dataset ID")], index_name: Annotated[StrictStr, Field(description="Index name")], _request_timeout: Union[ None, @@ -693,7 +693,7 @@ def delete_dataset_index( Delete a specific index from a dataset. - :param dataset_id: Dataset identifier (required) + :param dataset_id: Dataset ID (required) :type dataset_id: str :param index_name: Index name (required) :type index_name: str @@ -746,7 +746,7 @@ def delete_dataset_index( @validate_call def delete_dataset_index_with_http_info( self, - dataset_id: Annotated[StrictStr, Field(description="Dataset identifier")], + dataset_id: Annotated[StrictStr, Field(description="Dataset ID")], index_name: Annotated[StrictStr, Field(description="Index name")], _request_timeout: Union[ None, @@ -765,7 +765,7 @@ def delete_dataset_index_with_http_info( Delete a specific index from a dataset. - :param dataset_id: Dataset identifier (required) + :param dataset_id: Dataset ID (required) :type dataset_id: str :param index_name: Index name (required) :type index_name: str @@ -818,7 +818,7 @@ def delete_dataset_index_with_http_info( @validate_call def delete_dataset_index_without_preload_content( self, - dataset_id: Annotated[StrictStr, Field(description="Dataset identifier")], + dataset_id: Annotated[StrictStr, Field(description="Dataset ID")], index_name: Annotated[StrictStr, Field(description="Index name")], _request_timeout: Union[ None, @@ -837,7 +837,7 @@ def delete_dataset_index_without_preload_content( Delete a specific index from a dataset. - :param dataset_id: Dataset identifier (required) + :param dataset_id: Dataset ID (required) :type dataset_id: str :param index_name: Index name (required) :type index_name: str @@ -955,7 +955,7 @@ def _delete_dataset_index_serialize( @validate_call def delete_index( self, - connection_id: Annotated[StrictStr, Field(description="Connection identifier")], + connection_id: Annotated[StrictStr, Field(description="Connection ID")], var_schema: Annotated[StrictStr, Field(description="Schema name")], table: Annotated[StrictStr, Field(description="Table name")], index_name: Annotated[StrictStr, Field(description="Index name")], @@ -976,7 +976,7 @@ def delete_index( Delete a specific index from a cached table. - :param connection_id: Connection identifier (required) + :param connection_id: Connection ID (required) :type connection_id: str :param var_schema: Schema name (required) :type var_schema: str @@ -1036,7 +1036,7 @@ def delete_index( @validate_call def delete_index_with_http_info( self, - connection_id: Annotated[StrictStr, Field(description="Connection identifier")], + connection_id: Annotated[StrictStr, Field(description="Connection ID")], var_schema: Annotated[StrictStr, Field(description="Schema name")], table: Annotated[StrictStr, Field(description="Table name")], index_name: Annotated[StrictStr, Field(description="Index name")], @@ -1057,7 +1057,7 @@ def delete_index_with_http_info( Delete a specific index from a cached table. - :param connection_id: Connection identifier (required) + :param connection_id: Connection ID (required) :type connection_id: str :param var_schema: Schema name (required) :type var_schema: str @@ -1117,7 +1117,7 @@ def delete_index_with_http_info( @validate_call def delete_index_without_preload_content( self, - connection_id: Annotated[StrictStr, Field(description="Connection identifier")], + connection_id: Annotated[StrictStr, Field(description="Connection ID")], var_schema: Annotated[StrictStr, Field(description="Schema name")], table: Annotated[StrictStr, Field(description="Table name")], index_name: Annotated[StrictStr, Field(description="Index name")], @@ -1138,7 +1138,7 @@ def delete_index_without_preload_content( Delete a specific index from a cached table. - :param connection_id: Connection identifier (required) + :param connection_id: Connection ID (required) :type connection_id: str :param var_schema: Schema name (required) :type var_schema: str @@ -1268,7 +1268,7 @@ def _delete_index_serialize( @validate_call def list_dataset_indexes( self, - dataset_id: Annotated[StrictStr, Field(description="Dataset identifier")], + dataset_id: Annotated[StrictStr, Field(description="Dataset ID")], _request_timeout: Union[ None, Annotated[StrictFloat, Field(gt=0)], @@ -1286,7 +1286,7 @@ def list_dataset_indexes( List all indexes created on a dataset. - :param dataset_id: Dataset identifier (required) + :param dataset_id: Dataset ID (required) :type dataset_id: str :param _request_timeout: timeout setting for this request. If one number provided, it will be total request @@ -1336,7 +1336,7 @@ def list_dataset_indexes( @validate_call def list_dataset_indexes_with_http_info( self, - dataset_id: Annotated[StrictStr, Field(description="Dataset identifier")], + dataset_id: Annotated[StrictStr, Field(description="Dataset ID")], _request_timeout: Union[ None, Annotated[StrictFloat, Field(gt=0)], @@ -1354,7 +1354,7 @@ def list_dataset_indexes_with_http_info( List all indexes created on a dataset. - :param dataset_id: Dataset identifier (required) + :param dataset_id: Dataset ID (required) :type dataset_id: str :param _request_timeout: timeout setting for this request. If one number provided, it will be total request @@ -1404,7 +1404,7 @@ def list_dataset_indexes_with_http_info( @validate_call def list_dataset_indexes_without_preload_content( self, - dataset_id: Annotated[StrictStr, Field(description="Dataset identifier")], + dataset_id: Annotated[StrictStr, Field(description="Dataset ID")], _request_timeout: Union[ None, Annotated[StrictFloat, Field(gt=0)], @@ -1422,7 +1422,7 @@ def list_dataset_indexes_without_preload_content( List all indexes created on a dataset. - :param dataset_id: Dataset identifier (required) + :param dataset_id: Dataset ID (required) :type dataset_id: str :param _request_timeout: timeout setting for this request. If one number provided, it will be total request @@ -1534,7 +1534,7 @@ def _list_dataset_indexes_serialize( @validate_call def list_indexes( self, - connection_id: Annotated[StrictStr, Field(description="Connection identifier")], + connection_id: Annotated[StrictStr, Field(description="Connection ID")], var_schema: Annotated[StrictStr, Field(description="Schema name")], table: Annotated[StrictStr, Field(description="Table name")], _request_timeout: Union[ @@ -1554,7 +1554,7 @@ def list_indexes( List all indexes created on a cached table. - :param connection_id: Connection identifier (required) + :param connection_id: Connection ID (required) :type connection_id: str :param var_schema: Schema name (required) :type var_schema: str @@ -1611,7 +1611,7 @@ def list_indexes( @validate_call def list_indexes_with_http_info( self, - connection_id: Annotated[StrictStr, Field(description="Connection identifier")], + connection_id: Annotated[StrictStr, Field(description="Connection ID")], var_schema: Annotated[StrictStr, Field(description="Schema name")], table: Annotated[StrictStr, Field(description="Table name")], _request_timeout: Union[ @@ -1631,7 +1631,7 @@ def list_indexes_with_http_info( List all indexes created on a cached table. - :param connection_id: Connection identifier (required) + :param connection_id: Connection ID (required) :type connection_id: str :param var_schema: Schema name (required) :type var_schema: str @@ -1688,7 +1688,7 @@ def list_indexes_with_http_info( @validate_call def list_indexes_without_preload_content( self, - connection_id: Annotated[StrictStr, Field(description="Connection identifier")], + connection_id: Annotated[StrictStr, Field(description="Connection ID")], var_schema: Annotated[StrictStr, Field(description="Schema name")], table: Annotated[StrictStr, Field(description="Table name")], _request_timeout: Union[ @@ -1708,7 +1708,7 @@ def list_indexes_without_preload_content( List all indexes created on a cached table. - :param connection_id: Connection identifier (required) + :param connection_id: Connection ID (required) :type connection_id: str :param var_schema: Schema name (required) :type var_schema: str diff --git a/hotdata/api/query_api.py b/hotdata/api/query_api.py index 481bfd5..85a1509 100644 --- a/hotdata/api/query_api.py +++ b/hotdata/api/query_api.py @@ -104,6 +104,7 @@ def query( '404': "ApiErrorResponse", '429': "ApiErrorResponse", '500': "ApiErrorResponse", + '503': "ApiErrorResponse", } response_data = self.api_client.call_api( *_param, @@ -180,6 +181,7 @@ def query_with_http_info( '404': "ApiErrorResponse", '429': "ApiErrorResponse", '500': "ApiErrorResponse", + '503': "ApiErrorResponse", } response_data = self.api_client.call_api( *_param, @@ -256,6 +258,7 @@ def query_without_preload_content( '404': "ApiErrorResponse", '429': "ApiErrorResponse", '500': "ApiErrorResponse", + '503': "ApiErrorResponse", } response_data = self.api_client.call_api( *_param, diff --git a/hotdata/api/results_api.py b/hotdata/api/results_api.py index 5915014..8974d9a 100644 --- a/hotdata/api/results_api.py +++ b/hotdata/api/results_api.py @@ -47,7 +47,7 @@ def get_result( id: Annotated[StrictStr, Field(description="Result ID")], offset: Annotated[Optional[Annotated[int, Field(strict=True, ge=0)]], Field(description="Rows to skip (default: 0)")] = None, limit: Annotated[Optional[Annotated[int, Field(strict=True, ge=0)]], Field(description="Maximum rows to return (default: unbounded)")] = None, - format: Annotated[Optional[ResultsFormatQuery], Field(description="`arrow`, `json`, `csv`, `md`, or `parquet` — overrides the `Accept` header. `markdown` is also accepted at runtime as an alias for `md`, but is not in the OpenAPI enum to keep the SDK shape clean.")] = None, + format: Annotated[Optional[ResultsFormatQuery], Field(description="`arrow`, `json`, `csv`, `md`, or `parquet` — overrides the `Accept` header. `markdown` is also accepted at runtime as an alias for `md`.")] = None, _request_timeout: Union[ None, Annotated[StrictFloat, Field(gt=0)], @@ -63,7 +63,7 @@ def get_result( ) -> GetResultResponse: """Get result - Retrieve a persisted query result by ID. The response format for the `ready` state is selected by `Accept` header or `?format=` query param; non-ready states use the same status codes and JSON body shape regardless of format. | Result status | Status × body | |-----------------------|------------------------------------------------------------------------------| | `ready` + JSON | 200 `application/json` — `GetResultResponse` with `columns`, `rows`, etc. | | `ready` + Arrow | 200 `application/vnd.apache.arrow.stream` — schema, RecordBatches, EOS | | `ready` + CSV | 200 `text/csv; charset=utf-8` — single header row, streamed batch-by-batch | | `ready` + Markdown | 200 `text/markdown; charset=utf-8` — GitHub-flavored pipe table, streamed | | `ready` + Parquet | 200 `application/vnd.apache.parquet` — raw parquet bytes (no conversion) | | `pending`/`processing`| 202 `application/json` `{status, result_id}` + `Retry-After` | | `failed` | 409 `application/json` `{status, result_id, error_message}` | | not found | 404 `application/json` (`ApiErrorResponse`) | `?format=` accepts `arrow`, `json`, `csv`, `md`, `parquet` and takes precedence over `Accept`. `markdown` is accepted as a runtime alias for `md` (the OpenAPI enum lists only `md` to keep the SDK shape clean). Use `?offset=N&limit=M` to slice the result; `offset` defaults to 0 and `limit` is unbounded by default. Both must be non-negative; invalid values return 400. When a finite `limit` doesn't reach the end of the result, a `Link` header with `rel=\"next\"` points at the following page. `?offset`/`?limit` are ignored for `format=parquet` since that path returns the underlying file unchanged. Ready responses (Arrow, CSV, Markdown, JSON) carry `X-Total-Row-Count` (full result row count from parquet metadata, independent of offset/limit). The streaming paths run end-to-end with no spawned task between the parquet reader and the wire — clients can disconnect at any time and the server stops reading. IEEE special floats (`±Inf`, `NaN`) have no canonical JSON representation. For cross-format consistency the JSON, CSV, and Markdown paths emit them as `null` / empty cells, and JSON `nullable[]` is widened to match. The Arrow IPC and Parquet bodies are binary round-trip formats and preserve the raw IEEE values; callers cross-checking a result across CSV and Parquet should not byte-compare those slots. + Retrieve a persisted query result by ID. The response format for the `ready` state is selected by `Accept` header or `?format=` query param; non-ready states use the same status codes and JSON body shape regardless of format. | Result status | Status × body | |-----------------------|------------------------------------------------------------------------------| | `ready` + JSON | 200 `application/json` — `GetResultResponse` with `columns`, `rows`, etc. | | `ready` + Arrow | 200 `application/vnd.apache.arrow.stream` — schema, RecordBatches, EOS | | `ready` + CSV | 200 `text/csv; charset=utf-8` — single header row, streamed batch-by-batch | | `ready` + Markdown | 200 `text/markdown; charset=utf-8` — GitHub-flavored pipe table, streamed | | `ready` + Parquet | 200 `application/vnd.apache.parquet` — raw parquet bytes (no conversion) | | `pending`/`processing`| 202 `application/json` `{status, result_id}` + `Retry-After` | | `failed` | 409 `application/json` `{status, result_id, error_message}` | | not found | 404 `application/json` (`ApiErrorResponse`) | `?format=` accepts `arrow`, `json`, `csv`, `md`, `parquet` and takes precedence over `Accept`. `markdown` is accepted as a runtime alias for `md`. Use `?offset=N&limit=M` to slice the result; `offset` defaults to 0 and `limit` is unbounded by default. Both must be non-negative; invalid values return 400. When a finite `limit` doesn't reach the end of the result, a `Link` header with `rel=\"next\"` points at the following page. `?offset`/`?limit` are ignored for `format=parquet` since that path returns the underlying file unchanged. Ready responses (Arrow, CSV, Markdown, JSON) carry `X-Total-Row-Count` (the full result row count, independent of offset/limit). Responses are streamed end-to-end, so a client can disconnect at any time and the server stops reading. IEEE special floats (`±Inf`, `NaN`) have no canonical JSON representation. For cross-format consistency the JSON, CSV, and Markdown paths emit them as `null` / empty cells, and JSON `nullable[]` is widened to match. The Arrow IPC and Parquet bodies are binary round-trip formats and preserve the raw IEEE values; callers cross-checking a result across CSV and Parquet should not byte-compare those slots. :param id: Result ID (required) :type id: str @@ -71,7 +71,7 @@ def get_result( :type offset: int :param limit: Maximum rows to return (default: unbounded) :type limit: int - :param format: `arrow`, `json`, `csv`, `md`, or `parquet` — overrides the `Accept` header. `markdown` is also accepted at runtime as an alias for `md`, but is not in the OpenAPI enum to keep the SDK shape clean. + :param format: `arrow`, `json`, `csv`, `md`, or `parquet` — overrides the `Accept` header. `markdown` is also accepted at runtime as an alias for `md`. :type format: ResultsFormatQuery :param _request_timeout: timeout setting for this request. If one number provided, it will be total request @@ -130,7 +130,7 @@ def get_result_with_http_info( id: Annotated[StrictStr, Field(description="Result ID")], offset: Annotated[Optional[Annotated[int, Field(strict=True, ge=0)]], Field(description="Rows to skip (default: 0)")] = None, limit: Annotated[Optional[Annotated[int, Field(strict=True, ge=0)]], Field(description="Maximum rows to return (default: unbounded)")] = None, - format: Annotated[Optional[ResultsFormatQuery], Field(description="`arrow`, `json`, `csv`, `md`, or `parquet` — overrides the `Accept` header. `markdown` is also accepted at runtime as an alias for `md`, but is not in the OpenAPI enum to keep the SDK shape clean.")] = None, + format: Annotated[Optional[ResultsFormatQuery], Field(description="`arrow`, `json`, `csv`, `md`, or `parquet` — overrides the `Accept` header. `markdown` is also accepted at runtime as an alias for `md`.")] = None, _request_timeout: Union[ None, Annotated[StrictFloat, Field(gt=0)], @@ -146,7 +146,7 @@ def get_result_with_http_info( ) -> ApiResponse[GetResultResponse]: """Get result - Retrieve a persisted query result by ID. The response format for the `ready` state is selected by `Accept` header or `?format=` query param; non-ready states use the same status codes and JSON body shape regardless of format. | Result status | Status × body | |-----------------------|------------------------------------------------------------------------------| | `ready` + JSON | 200 `application/json` — `GetResultResponse` with `columns`, `rows`, etc. | | `ready` + Arrow | 200 `application/vnd.apache.arrow.stream` — schema, RecordBatches, EOS | | `ready` + CSV | 200 `text/csv; charset=utf-8` — single header row, streamed batch-by-batch | | `ready` + Markdown | 200 `text/markdown; charset=utf-8` — GitHub-flavored pipe table, streamed | | `ready` + Parquet | 200 `application/vnd.apache.parquet` — raw parquet bytes (no conversion) | | `pending`/`processing`| 202 `application/json` `{status, result_id}` + `Retry-After` | | `failed` | 409 `application/json` `{status, result_id, error_message}` | | not found | 404 `application/json` (`ApiErrorResponse`) | `?format=` accepts `arrow`, `json`, `csv`, `md`, `parquet` and takes precedence over `Accept`. `markdown` is accepted as a runtime alias for `md` (the OpenAPI enum lists only `md` to keep the SDK shape clean). Use `?offset=N&limit=M` to slice the result; `offset` defaults to 0 and `limit` is unbounded by default. Both must be non-negative; invalid values return 400. When a finite `limit` doesn't reach the end of the result, a `Link` header with `rel=\"next\"` points at the following page. `?offset`/`?limit` are ignored for `format=parquet` since that path returns the underlying file unchanged. Ready responses (Arrow, CSV, Markdown, JSON) carry `X-Total-Row-Count` (full result row count from parquet metadata, independent of offset/limit). The streaming paths run end-to-end with no spawned task between the parquet reader and the wire — clients can disconnect at any time and the server stops reading. IEEE special floats (`±Inf`, `NaN`) have no canonical JSON representation. For cross-format consistency the JSON, CSV, and Markdown paths emit them as `null` / empty cells, and JSON `nullable[]` is widened to match. The Arrow IPC and Parquet bodies are binary round-trip formats and preserve the raw IEEE values; callers cross-checking a result across CSV and Parquet should not byte-compare those slots. + Retrieve a persisted query result by ID. The response format for the `ready` state is selected by `Accept` header or `?format=` query param; non-ready states use the same status codes and JSON body shape regardless of format. | Result status | Status × body | |-----------------------|------------------------------------------------------------------------------| | `ready` + JSON | 200 `application/json` — `GetResultResponse` with `columns`, `rows`, etc. | | `ready` + Arrow | 200 `application/vnd.apache.arrow.stream` — schema, RecordBatches, EOS | | `ready` + CSV | 200 `text/csv; charset=utf-8` — single header row, streamed batch-by-batch | | `ready` + Markdown | 200 `text/markdown; charset=utf-8` — GitHub-flavored pipe table, streamed | | `ready` + Parquet | 200 `application/vnd.apache.parquet` — raw parquet bytes (no conversion) | | `pending`/`processing`| 202 `application/json` `{status, result_id}` + `Retry-After` | | `failed` | 409 `application/json` `{status, result_id, error_message}` | | not found | 404 `application/json` (`ApiErrorResponse`) | `?format=` accepts `arrow`, `json`, `csv`, `md`, `parquet` and takes precedence over `Accept`. `markdown` is accepted as a runtime alias for `md`. Use `?offset=N&limit=M` to slice the result; `offset` defaults to 0 and `limit` is unbounded by default. Both must be non-negative; invalid values return 400. When a finite `limit` doesn't reach the end of the result, a `Link` header with `rel=\"next\"` points at the following page. `?offset`/`?limit` are ignored for `format=parquet` since that path returns the underlying file unchanged. Ready responses (Arrow, CSV, Markdown, JSON) carry `X-Total-Row-Count` (the full result row count, independent of offset/limit). Responses are streamed end-to-end, so a client can disconnect at any time and the server stops reading. IEEE special floats (`±Inf`, `NaN`) have no canonical JSON representation. For cross-format consistency the JSON, CSV, and Markdown paths emit them as `null` / empty cells, and JSON `nullable[]` is widened to match. The Arrow IPC and Parquet bodies are binary round-trip formats and preserve the raw IEEE values; callers cross-checking a result across CSV and Parquet should not byte-compare those slots. :param id: Result ID (required) :type id: str @@ -154,7 +154,7 @@ def get_result_with_http_info( :type offset: int :param limit: Maximum rows to return (default: unbounded) :type limit: int - :param format: `arrow`, `json`, `csv`, `md`, or `parquet` — overrides the `Accept` header. `markdown` is also accepted at runtime as an alias for `md`, but is not in the OpenAPI enum to keep the SDK shape clean. + :param format: `arrow`, `json`, `csv`, `md`, or `parquet` — overrides the `Accept` header. `markdown` is also accepted at runtime as an alias for `md`. :type format: ResultsFormatQuery :param _request_timeout: timeout setting for this request. If one number provided, it will be total request @@ -213,7 +213,7 @@ def get_result_without_preload_content( id: Annotated[StrictStr, Field(description="Result ID")], offset: Annotated[Optional[Annotated[int, Field(strict=True, ge=0)]], Field(description="Rows to skip (default: 0)")] = None, limit: Annotated[Optional[Annotated[int, Field(strict=True, ge=0)]], Field(description="Maximum rows to return (default: unbounded)")] = None, - format: Annotated[Optional[ResultsFormatQuery], Field(description="`arrow`, `json`, `csv`, `md`, or `parquet` — overrides the `Accept` header. `markdown` is also accepted at runtime as an alias for `md`, but is not in the OpenAPI enum to keep the SDK shape clean.")] = None, + format: Annotated[Optional[ResultsFormatQuery], Field(description="`arrow`, `json`, `csv`, `md`, or `parquet` — overrides the `Accept` header. `markdown` is also accepted at runtime as an alias for `md`.")] = None, _request_timeout: Union[ None, Annotated[StrictFloat, Field(gt=0)], @@ -229,7 +229,7 @@ def get_result_without_preload_content( ) -> RESTResponseType: """Get result - Retrieve a persisted query result by ID. The response format for the `ready` state is selected by `Accept` header or `?format=` query param; non-ready states use the same status codes and JSON body shape regardless of format. | Result status | Status × body | |-----------------------|------------------------------------------------------------------------------| | `ready` + JSON | 200 `application/json` — `GetResultResponse` with `columns`, `rows`, etc. | | `ready` + Arrow | 200 `application/vnd.apache.arrow.stream` — schema, RecordBatches, EOS | | `ready` + CSV | 200 `text/csv; charset=utf-8` — single header row, streamed batch-by-batch | | `ready` + Markdown | 200 `text/markdown; charset=utf-8` — GitHub-flavored pipe table, streamed | | `ready` + Parquet | 200 `application/vnd.apache.parquet` — raw parquet bytes (no conversion) | | `pending`/`processing`| 202 `application/json` `{status, result_id}` + `Retry-After` | | `failed` | 409 `application/json` `{status, result_id, error_message}` | | not found | 404 `application/json` (`ApiErrorResponse`) | `?format=` accepts `arrow`, `json`, `csv`, `md`, `parquet` and takes precedence over `Accept`. `markdown` is accepted as a runtime alias for `md` (the OpenAPI enum lists only `md` to keep the SDK shape clean). Use `?offset=N&limit=M` to slice the result; `offset` defaults to 0 and `limit` is unbounded by default. Both must be non-negative; invalid values return 400. When a finite `limit` doesn't reach the end of the result, a `Link` header with `rel=\"next\"` points at the following page. `?offset`/`?limit` are ignored for `format=parquet` since that path returns the underlying file unchanged. Ready responses (Arrow, CSV, Markdown, JSON) carry `X-Total-Row-Count` (full result row count from parquet metadata, independent of offset/limit). The streaming paths run end-to-end with no spawned task between the parquet reader and the wire — clients can disconnect at any time and the server stops reading. IEEE special floats (`±Inf`, `NaN`) have no canonical JSON representation. For cross-format consistency the JSON, CSV, and Markdown paths emit them as `null` / empty cells, and JSON `nullable[]` is widened to match. The Arrow IPC and Parquet bodies are binary round-trip formats and preserve the raw IEEE values; callers cross-checking a result across CSV and Parquet should not byte-compare those slots. + Retrieve a persisted query result by ID. The response format for the `ready` state is selected by `Accept` header or `?format=` query param; non-ready states use the same status codes and JSON body shape regardless of format. | Result status | Status × body | |-----------------------|------------------------------------------------------------------------------| | `ready` + JSON | 200 `application/json` — `GetResultResponse` with `columns`, `rows`, etc. | | `ready` + Arrow | 200 `application/vnd.apache.arrow.stream` — schema, RecordBatches, EOS | | `ready` + CSV | 200 `text/csv; charset=utf-8` — single header row, streamed batch-by-batch | | `ready` + Markdown | 200 `text/markdown; charset=utf-8` — GitHub-flavored pipe table, streamed | | `ready` + Parquet | 200 `application/vnd.apache.parquet` — raw parquet bytes (no conversion) | | `pending`/`processing`| 202 `application/json` `{status, result_id}` + `Retry-After` | | `failed` | 409 `application/json` `{status, result_id, error_message}` | | not found | 404 `application/json` (`ApiErrorResponse`) | `?format=` accepts `arrow`, `json`, `csv`, `md`, `parquet` and takes precedence over `Accept`. `markdown` is accepted as a runtime alias for `md`. Use `?offset=N&limit=M` to slice the result; `offset` defaults to 0 and `limit` is unbounded by default. Both must be non-negative; invalid values return 400. When a finite `limit` doesn't reach the end of the result, a `Link` header with `rel=\"next\"` points at the following page. `?offset`/`?limit` are ignored for `format=parquet` since that path returns the underlying file unchanged. Ready responses (Arrow, CSV, Markdown, JSON) carry `X-Total-Row-Count` (the full result row count, independent of offset/limit). Responses are streamed end-to-end, so a client can disconnect at any time and the server stops reading. IEEE special floats (`±Inf`, `NaN`) have no canonical JSON representation. For cross-format consistency the JSON, CSV, and Markdown paths emit them as `null` / empty cells, and JSON `nullable[]` is widened to match. The Arrow IPC and Parquet bodies are binary round-trip formats and preserve the raw IEEE values; callers cross-checking a result across CSV and Parquet should not byte-compare those slots. :param id: Result ID (required) :type id: str @@ -237,7 +237,7 @@ def get_result_without_preload_content( :type offset: int :param limit: Maximum rows to return (default: unbounded) :type limit: int - :param format: `arrow`, `json`, `csv`, `md`, or `parquet` — overrides the `Accept` header. `markdown` is also accepted at runtime as an alias for `md`, but is not in the OpenAPI enum to keep the SDK shape clean. + :param format: `arrow`, `json`, `csv`, `md`, or `parquet` — overrides the `Accept` header. `markdown` is also accepted at runtime as an alias for `md`. :type format: ResultsFormatQuery :param _request_timeout: timeout setting for this request. If one number provided, it will be total request diff --git a/hotdata/api_client.py b/hotdata/api_client.py index 465fa0b..ecbadea 100644 --- a/hotdata/api_client.py +++ b/hotdata/api_client.py @@ -91,7 +91,7 @@ def __init__( self.default_headers[header_name] = header_value self.cookie = cookie # Set default User-Agent. - self.user_agent = 'OpenAPI-Generator/0.3.4/python' + self.user_agent = 'OpenAPI-Generator/0.4.1/python' self.client_side_validation = configuration.client_side_validation def __enter__(self): diff --git a/hotdata/models/create_connection_request.py b/hotdata/models/create_connection_request.py index 6044185..8ec308a 100644 --- a/hotdata/models/create_connection_request.py +++ b/hotdata/models/create_connection_request.py @@ -33,8 +33,7 @@ class CreateConnectionRequest(BaseModel): secret_name: Optional[StrictStr] = Field(default=None, description="Optional reference to a secret by name. If provided, this secret will be used for authentication. Mutually exclusive with `secret_id`.") skip_discovery: Optional[StrictBool] = Field(default=None, description="If true, skip automatic schema discovery after registering the connection. The connection will be created but no tables will be discovered. You can run discovery later via the refresh endpoint.") source_type: StrictStr - storage_backend: Optional[StrictStr] = Field(default=None, description="Physical storage backend for tables created under this connection. `\"parquet\"` (default) uses the versioned parquet cache. `\"ducklake\"` stores data in a DuckLake catalog in the shared metadata DB configured via `ducklake.metadata_pg_url`; accepted for any source type and requires that pool to be configured.") - __properties: ClassVar[List[str]] = ["config", "name", "secret_id", "secret_name", "skip_discovery", "source_type", "storage_backend"] + __properties: ClassVar[List[str]] = ["config", "name", "secret_id", "secret_name", "skip_discovery", "source_type"] model_config = ConfigDict( populate_by_name=True, @@ -85,11 +84,6 @@ def to_dict(self) -> Dict[str, Any]: if self.secret_name is None and "secret_name" in self.model_fields_set: _dict['secret_name'] = None - # set to None if storage_backend (nullable) is None - # and model_fields_set contains the field - if self.storage_backend is None and "storage_backend" in self.model_fields_set: - _dict['storage_backend'] = None - return _dict @classmethod @@ -107,8 +101,7 @@ def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]: "secret_id": obj.get("secret_id"), "secret_name": obj.get("secret_name"), "skip_discovery": obj.get("skip_discovery"), - "source_type": obj.get("source_type"), - "storage_backend": obj.get("storage_backend") + "source_type": obj.get("source_type") }) return _obj diff --git a/hotdata/models/create_database_request.py b/hotdata/models/create_database_request.py index ed64625..73e77f5 100644 --- a/hotdata/models/create_database_request.py +++ b/hotdata/models/create_database_request.py @@ -31,9 +31,8 @@ class CreateDatabaseRequest(BaseModel): default_catalog: Optional[StrictStr] = Field(default=None, description="Optional name the database's auto-created default catalog answers to inside its query scope. Must be a valid SQL identifier (`[a-z0-9_]`, not starting with a digit) and may not collide with the system catalogs `hotdata`, `datasets`, or `information_schema`. Defaults to `default` when omitted, so `default.main.
` keeps working.") expires_at: Optional[StrictStr] = Field(default=None, description="When this database expires. Accepts either an RFC 3339 timestamp (e.g. `\"2026-06-01T00:00:00Z\"`) or a relative duration suffixed with `h` (hours), `m` (minutes), or `d` (days) — for example `\"24h\"`, `\"48h\"`, or `\"7d\"`. Omitted (or empty) means the database never expires. Expiry is best-effort: the database will not be deleted before `expires_at`, but cleanup may run later than the exact timestamp.") name: Optional[StrictStr] = Field(default=None, description="Optional free-form display label (for UIs/CLIs). Not unique. Not an identifier — databases are always addressed by `id`. Accepts the legacy `description` key as an alias so clients that predate the rename keep populating this field.") - schemas: Optional[List[DatabaseDefaultSchemaDecl]] = Field(default=None, description="Optional schemas/tables to declare on the database's auto-created default catalog. Mirrors the `config.schemas` field of a managed `POST /v1/connections`. Tables declared here can be loaded via the standard managed-table load endpoint targeting `default_connection_id`. Omitted or empty means the default catalog starts empty.") - storage_backend: Optional[StrictStr] = Field(default=None, description="Physical storage backend for the database's auto-created `default` catalog. `\"parquet\"` (default) uses the versioned parquet cache. `\"ducklake\"` stores data in a DuckLake catalog in the shared metadata DB configured via `ducklake.metadata_pg_url`, which must be configured for that value to be accepted. Omitted means `\"parquet\"`.") - __properties: ClassVar[List[str]] = ["default_catalog", "expires_at", "name", "schemas", "storage_backend"] + schemas: Optional[List[DatabaseDefaultSchemaDecl]] = Field(default=None, description="Optional schemas/tables to declare on the database's auto-created default catalog. Tables declared here can be loaded via the standard managed-table load endpoint targeting `default_connection_id`. Omitted or empty means the default catalog starts empty.") + __properties: ClassVar[List[str]] = ["default_catalog", "expires_at", "name", "schemas"] model_config = ConfigDict( populate_by_name=True, @@ -96,11 +95,6 @@ def to_dict(self) -> Dict[str, Any]: if self.name is None and "name" in self.model_fields_set: _dict['name'] = None - # set to None if storage_backend (nullable) is None - # and model_fields_set contains the field - if self.storage_backend is None and "storage_backend" in self.model_fields_set: - _dict['storage_backend'] = None - return _dict @classmethod @@ -116,8 +110,7 @@ def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]: "default_catalog": obj.get("default_catalog"), "expires_at": obj.get("expires_at"), "name": obj.get("name"), - "schemas": [DatabaseDefaultSchemaDecl.from_dict(_item) for _item in obj["schemas"]] if obj.get("schemas") is not None else None, - "storage_backend": obj.get("storage_backend") + "schemas": [DatabaseDefaultSchemaDecl.from_dict(_item) for _item in obj["schemas"]] if obj.get("schemas") is not None else None }) return _obj diff --git a/hotdata/models/create_dataset_request.py b/hotdata/models/create_dataset_request.py index babc79c..f70e5ee 100644 --- a/hotdata/models/create_dataset_request.py +++ b/hotdata/models/create_dataset_request.py @@ -30,9 +30,8 @@ class CreateDatasetRequest(BaseModel): """ # noqa: E501 label: StrictStr source: DatasetSource - storage_backend: Optional[StrictStr] = Field(default=None, description="Optional storage backend: `\"parquet\"` (default) or `\"ducklake\"`. `\"ducklake\"` requires `ducklake.metadata_pg_url` to be configured at engine boot; the engine also rejects the combo of `storage_backend: \"ducklake\"` with a saved-query source or with explicit geometry columns (both deferred to a follow-up).") table_name: Optional[StrictStr] = Field(default=None, description="Optional table_name - if not provided, derived from label") - __properties: ClassVar[List[str]] = ["label", "source", "storage_backend", "table_name"] + __properties: ClassVar[List[str]] = ["label", "source", "table_name"] model_config = ConfigDict( populate_by_name=True, @@ -76,11 +75,6 @@ def to_dict(self) -> Dict[str, Any]: # override the default output from pydantic by calling `to_dict()` of source if self.source: _dict['source'] = self.source.to_dict() - # set to None if storage_backend (nullable) is None - # and model_fields_set contains the field - if self.storage_backend is None and "storage_backend" in self.model_fields_set: - _dict['storage_backend'] = None - # set to None if table_name (nullable) is None # and model_fields_set contains the field if self.table_name is None and "table_name" in self.model_fields_set: @@ -100,7 +94,6 @@ def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]: _obj = cls.model_validate({ "label": obj.get("label"), "source": DatasetSource.from_dict(obj["source"]) if obj.get("source") is not None else None, - "storage_backend": obj.get("storage_backend"), "table_name": obj.get("table_name") }) return _obj diff --git a/hotdata/models/database_default_schema_decl.py b/hotdata/models/database_default_schema_decl.py index 4d5f6f8..f55c374 100644 --- a/hotdata/models/database_default_schema_decl.py +++ b/hotdata/models/database_default_schema_decl.py @@ -26,7 +26,7 @@ class DatabaseDefaultSchemaDecl(BaseModel): """ - One schema declaration inside the database's default catalog at create time. Mirrors `crate::source::ManagedSchemaDecl`. Tables default to empty so callers can declare just a schema name and add tables later via the managed-tables API on the default connection. + One schema declaration inside the database's default catalog, supplied at create time. `tables` defaults to empty, so you can declare just a schema name and add tables later. """ # noqa: E501 name: StrictStr tables: Optional[List[DatabaseDefaultTableDecl]] = None diff --git a/hotdata/models/database_default_table_decl.py b/hotdata/models/database_default_table_decl.py index 5742d9d..8dc5dec 100644 --- a/hotdata/models/database_default_table_decl.py +++ b/hotdata/models/database_default_table_decl.py @@ -25,7 +25,7 @@ class DatabaseDefaultTableDecl(BaseModel): """ - One table declaration inside a default-catalog schema at database-create time. Mirrors `crate::source::ManagedTableDecl` shape so the controller can convert with a simple `.map`. + One table declaration inside a default-catalog schema, supplied at database-create time. """ # noqa: E501 name: StrictStr __properties: ClassVar[List[str]] = ["name"] diff --git a/hotdata/models/get_result_response.py b/hotdata/models/get_result_response.py index d1b6b12..becbedb 100644 --- a/hotdata/models/get_result_response.py +++ b/hotdata/models/get_result_response.py @@ -18,9 +18,8 @@ import re # noqa: F401 import json -from pydantic import BaseModel, ConfigDict, Field, StrictBool, StrictStr +from pydantic import BaseModel, ConfigDict, Field, StrictBool, StrictInt, StrictStr from typing import Any, ClassVar, Dict, List, Optional -from typing_extensions import Annotated from typing import Optional, Set from typing_extensions import Self @@ -32,7 +31,7 @@ class GetResultResponse(BaseModel): error_message: Optional[StrictStr] = None nullable: Optional[List[StrictBool]] = None result_id: StrictStr - row_count: Optional[Annotated[int, Field(strict=True, ge=0)]] = None + row_count: Optional[StrictInt] = None rows: Optional[List[List[Any]]] = Field(default=None, description="Array of rows, where each row is an array of column values.") status: StrictStr __properties: ClassVar[List[str]] = ["columns", "error_message", "nullable", "result_id", "row_count", "rows", "status"] diff --git a/hotdata/models/job_type.py b/hotdata/models/job_type.py index 3db4324..2c51146 100644 --- a/hotdata/models/job_type.py +++ b/hotdata/models/job_type.py @@ -33,6 +33,7 @@ class JobType(str, Enum): DATASET_REFRESH = 'dataset_refresh' CREATE_INDEX = 'create_index' CREATE_DATASET_INDEX = 'create_dataset_index' + MANAGED_LOAD = 'managed_load' @classmethod def from_json(cls, json_str: str) -> Self: diff --git a/hotdata/models/load_managed_table_request.py b/hotdata/models/load_managed_table_request.py index 4e6461c..3313934 100644 --- a/hotdata/models/load_managed_table_request.py +++ b/hotdata/models/load_managed_table_request.py @@ -25,7 +25,7 @@ class LoadManagedTableRequest(BaseModel): """ - Request body for `POST /v1/connections/{connection_id}/schemas/{schema}/tables/{table}/loads`. Publishes a previously-uploaded parquet file as the new generation for the named managed table. `mode` is fixed to `\"replace\"` today; the field is kept in the request body so future modes (e.g. append) are an additive change. + Request body for `POST /v1/connections/{connection_id}/schemas/{schema}/tables/{table}/loads`. Publishes a previously-uploaded parquet file as the new contents of the named managed table. `mode` is fixed to `\"replace\"` today; the field is kept in the request body so future modes (e.g. append) are an additive change. """ # noqa: E501 mode: StrictStr = Field(description="Load mode. Only `\"replace\"` is supported in this release.") upload_id: StrictStr = Field(description="ID of a previously-staged upload (see `POST /v1/files`). The upload must reference a parquet file. The upload is claimed atomically; concurrent loads against the same `upload_id` return 409.") diff --git a/hotdata/models/load_managed_table_response.py b/hotdata/models/load_managed_table_response.py index 0d7d58f..04c0e82 100644 --- a/hotdata/models/load_managed_table_response.py +++ b/hotdata/models/load_managed_table_response.py @@ -28,7 +28,7 @@ class LoadManagedTableResponse(BaseModel): """ Response body for `POST /v1/connections/{connection_id}/schemas/{schema}/tables/{table}/loads`. """ # noqa: E501 - arrow_schema_json: StrictStr = Field(description="Arrow schema (JSON) parsed from the uploaded parquet footer.") + arrow_schema_json: StrictStr = Field(description="Schema of the loaded table, as JSON.") connection_id: StrictStr row_count: Annotated[int, Field(strict=True, ge=0)] = Field(description="Total rows in the published parquet file.") schema_name: StrictStr diff --git a/hotdata/models/managed_schema_response.py b/hotdata/models/managed_schema_response.py index 9449067..930175a 100644 --- a/hotdata/models/managed_schema_response.py +++ b/hotdata/models/managed_schema_response.py @@ -25,7 +25,7 @@ class ManagedSchemaResponse(BaseModel): """ - Response body for a successful add-schema request. Echoes the normalised (lowercased) names so callers see exactly what was persisted. + Response body for a successful add-schema request. Echoes the normalized (lowercased) names so callers see exactly what was persisted. """ # noqa: E501 connection_id: StrictStr = Field(description="Connection backing the catalog the schema was added to. For a database default catalog this is the database's `default_connection_id`.") var_schema: StrictStr = Field(alias="schema") diff --git a/hotdata/models/query_response.py b/hotdata/models/query_response.py index 429740a..f08ab1b 100644 --- a/hotdata/models/query_response.py +++ b/hotdata/models/query_response.py @@ -31,14 +31,14 @@ class QueryResponse(BaseModel): columns: List[StrictStr] execution_time_ms: Annotated[int, Field(strict=True, ge=0)] nullable: List[StrictBool] = Field(description="Nullable flags for each column (parallel to columns vec). True if the column allows NULL values, false if NOT NULL.") - preview_row_count: StrictInt = Field(description="Number of rows in *this* response body (`rows.len()`). Always present. For a large result this is a bounded preview, not the grand total — see `total_row_count` and `truncated` (#640).") + preview_row_count: StrictInt = Field(description="Number of rows in *this* response body. Always present. For a large result this is a bounded preview, not the grand total — see `total_row_count` and `truncated`.") query_run_id: StrictStr = Field(description="Unique identifier for the query run record (qrun...).") - result_id: Optional[StrictStr] = Field(default=None, description="Unique identifier for retrieving this result via GET /results/{id}. Null if catalog registration failed (see `warning` field for details). When non-null, the result is being persisted asynchronously.") - row_count: Annotated[int, Field(strict=True, ge=0)] = Field(description="**Deprecated** — use `preview_row_count` (rows in this body) and `total_row_count` (grand total) instead. Retained for backward compatibility and currently always equal to `preview_row_count`; it will be removed in a future release once clients migrate to the count fields below (#640).") + result_id: Optional[StrictStr] = Field(default=None, description="Unique identifier for retrieving this result via GET /results/{id}. When non-null, the result is being persisted asynchronously. Null only when the result fit entirely in this response (`truncated: false`) but could not be persisted for later retrieval — see the `warning` field. A `truncated: true` response ALWAYS carries a non-null, resolvable `result_id` (#640 F1): a truncated result that cannot be persisted fails the request with a retryable HTTP 503 (`PERSISTENCE_UNAVAILABLE`, with a `Retry-After` header) rather than returning a partial body with a dead ticket.") + row_count: Annotated[int, Field(strict=True, ge=0)] = Field(description="**Deprecated** — use `preview_row_count` (rows in this body) and `total_row_count` (grand total) instead. Retained as a back-compat alias and always equal to `preview_row_count`; for a truncated result it is the preview count, *not* the grand total — read `total_row_count` for that. Will be removed in a future release once clients migrate.") rows: List[List[Any]] = Field(description="Array of rows, where each row is an array of column values. Values can be strings, numbers, booleans, or null.") - total_row_count: Optional[StrictInt] = Field(default=None, description="Grand total rows in the full result. Present (and equal to `preview_row_count`) when the whole result fit in this response; `null` while a truncated result is still being persisted. When `null`, read the authoritative total from `GET /v1/query-runs/{id}` (`row_count`) or the `X-Total-Row-Count` header on `GET /v1/results/{id}` (#640).") - truncated: StrictBool = Field(description="True when `rows` is a bounded preview of a larger result. Fetch the full result via `result_id` (#640). Always `false` until bounded streaming is enabled; clients should still branch on it so no code change is needed when truncation goes live.") - warning: Optional[StrictStr] = Field(default=None, description="Warning message if result persistence could not be initiated. When present, `result_id` will be null and the result cannot be retrieved later. The query results are still returned in this response.") + total_row_count: Optional[StrictInt] = Field(default=None, description="Grand total rows in the full result. Present (and equal to `preview_row_count`) when the whole result fit in this response; `null` while a truncated result is still being persisted. When `null`, read the authoritative total from `GET /v1/query-runs/{id}` (`row_count`) or the `X-Total-Row-Count` header on `GET /v1/results/{id}`.") + truncated: StrictBool = Field(description="True when `rows` is a bounded preview of a larger result. Fetch the full result via `result_id`.") + warning: Optional[StrictStr] = Field(default=None, description="Warning message if result persistence could not be initiated. Present only when the full result is returned inline (`truncated: false`) but could not be persisted: `result_id` is then null and the result cannot be re-fetched later, though every row is in this response. A truncated result never carries a warning — if it cannot be persisted the request fails with a retryable HTTP 503 (`PERSISTENCE_UNAVAILABLE`, with a `Retry-After` header) instead (#640 F1).") __properties: ClassVar[List[str]] = ["columns", "execution_time_ms", "nullable", "preview_row_count", "query_run_id", "result_id", "row_count", "rows", "total_row_count", "truncated", "warning"] model_config = ConfigDict( diff --git a/hotdata/models/query_run_info.py b/hotdata/models/query_run_info.py index 327c36c..cf9aa76 100644 --- a/hotdata/models/query_run_info.py +++ b/hotdata/models/query_run_info.py @@ -28,6 +28,7 @@ class QueryRunInfo(BaseModel): """ Single query run for listing """ # noqa: E501 + bytes_scanned: Optional[StrictInt] = Field(default=None, description="Total bytes of table data read from storage to run this query. `null` when the query touches no table at all (for example a constant expression like `SELECT 1`). May be `0` when the query reads a table but not its row data — for example a row count served from table statistics.") completed_at: Optional[datetime] = None created_at: datetime error_message: Optional[StrictStr] = None @@ -35,6 +36,7 @@ class QueryRunInfo(BaseModel): id: StrictStr result_id: Optional[StrictStr] = None row_count: Optional[StrictInt] = None + rows_scanned: Optional[StrictInt] = Field(default=None, description="Total rows read from storage to run this query, before any filtering or aggregation. Distinct from `row_count`, which is how many rows the query returned. `null` when the query reads no table data from storage.") saved_query_id: Optional[StrictStr] = None saved_query_version: Optional[StrictInt] = None server_processing_ms: Optional[StrictInt] = Field(default=None, description="Total server-side processing time for this query (milliseconds). Measured from query start to result ready. Includes SQL execution, task spawning, and result preparation. Does not include network transit. Populated for all completed query runs (sync and async).") @@ -43,9 +45,9 @@ class QueryRunInfo(BaseModel): sql_text: StrictStr status: StrictStr trace_id: Optional[StrictStr] = None - user_public_id: Optional[StrictStr] = Field(default=None, description="Caller identity derived from the Authorization Bearer token (SHA-256 hash). Format: `user_{first_10_hex_chars}`. Mirrors the webapp's `user_public_id_from_auth_header`.") + user_public_id: Optional[StrictStr] = Field(default=None, description="Caller identity derived from the Authorization Bearer token (SHA-256 hash). Format: `user_{first_10_hex_chars}`.") warning_message: Optional[StrictStr] = None - __properties: ClassVar[List[str]] = ["completed_at", "created_at", "error_message", "execution_time_ms", "id", "result_id", "row_count", "saved_query_id", "saved_query_version", "server_processing_ms", "snapshot_id", "sql_hash", "sql_text", "status", "trace_id", "user_public_id", "warning_message"] + __properties: ClassVar[List[str]] = ["bytes_scanned", "completed_at", "created_at", "error_message", "execution_time_ms", "id", "result_id", "row_count", "rows_scanned", "saved_query_id", "saved_query_version", "server_processing_ms", "snapshot_id", "sql_hash", "sql_text", "status", "trace_id", "user_public_id", "warning_message"] model_config = ConfigDict( populate_by_name=True, @@ -86,6 +88,11 @@ def to_dict(self) -> Dict[str, Any]: exclude=excluded_fields, exclude_none=True, ) + # set to None if bytes_scanned (nullable) is None + # and model_fields_set contains the field + if self.bytes_scanned is None and "bytes_scanned" in self.model_fields_set: + _dict['bytes_scanned'] = None + # set to None if completed_at (nullable) is None # and model_fields_set contains the field if self.completed_at is None and "completed_at" in self.model_fields_set: @@ -111,6 +118,11 @@ def to_dict(self) -> Dict[str, Any]: if self.row_count is None and "row_count" in self.model_fields_set: _dict['row_count'] = None + # set to None if rows_scanned (nullable) is None + # and model_fields_set contains the field + if self.rows_scanned is None and "rows_scanned" in self.model_fields_set: + _dict['rows_scanned'] = None + # set to None if saved_query_id (nullable) is None # and model_fields_set contains the field if self.saved_query_id is None and "saved_query_id" in self.model_fields_set: @@ -153,6 +165,7 @@ def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]: return cls.model_validate(obj) _obj = cls.model_validate({ + "bytes_scanned": obj.get("bytes_scanned"), "completed_at": obj.get("completed_at"), "created_at": obj.get("created_at"), "error_message": obj.get("error_message"), @@ -160,6 +173,7 @@ def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]: "id": obj.get("id"), "result_id": obj.get("result_id"), "row_count": obj.get("row_count"), + "rows_scanned": obj.get("rows_scanned"), "saved_query_id": obj.get("saved_query_id"), "saved_query_version": obj.get("saved_query_version"), "server_processing_ms": obj.get("server_processing_ms"), diff --git a/hotdata/models/results_format_query.py b/hotdata/models/results_format_query.py index 8733a26..335e518 100644 --- a/hotdata/models/results_format_query.py +++ b/hotdata/models/results_format_query.py @@ -21,7 +21,7 @@ class ResultsFormatQuery(str, Enum): """ - Schema for the `?format=` query parameter on `GET /v1/results/{id}`. Documents the canonical values that SDKs should treat as a closed set (`arrow`, `json`, `csv`, `md`, `parquet`). The runtime handler's negotiator (`negotiate_results_format`) additionally accepts `markdown` as an alias for `md` — case-insensitive, with unknown values falling through to the `Accept` header — but `markdown` is intentionally NOT listed in this enum so SDK generators emit a single canonical `Markdown` (or equivalent) variant rather than two distinct ones for the same logical format. + The `?format=` query parameter on `GET /v1/results/{id}`. One of `arrow`, `json`, `csv`, `md`, or `parquet`. """ """ diff --git a/pyproject.toml b/pyproject.toml index 6536523..ff40c21 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "hotdata" -version = "0.4.0" +version = "0.4.1" description = "Hotdata API" authors = [ {name = "Hotdata",email = "developers@hotdata.dev"}, diff --git a/test/test_create_connection_request.py b/test/test_create_connection_request.py index f7198d9..09c306e 100644 --- a/test/test_create_connection_request.py +++ b/test/test_create_connection_request.py @@ -43,8 +43,7 @@ def make_instance(self, include_optional) -> CreateConnectionRequest: secret_id = '', secret_name = '', skip_discovery = True, - source_type = '', - storage_backend = '' + source_type = '' ) else: return CreateConnectionRequest( diff --git a/test/test_create_database_request.py b/test/test_create_database_request.py index cdd16ea..0fa1aac 100644 --- a/test/test_create_database_request.py +++ b/test/test_create_database_request.py @@ -46,8 +46,7 @@ def make_instance(self, include_optional) -> CreateDatabaseRequest: hotdata.models.database_default_table_decl.DatabaseDefaultTableDecl( name = '', ) ], ) - ], - storage_backend = '' + ] ) else: return CreateDatabaseRequest( diff --git a/test/test_create_dataset_request.py b/test/test_create_dataset_request.py index 0687d88..4c639ac 100644 --- a/test/test_create_dataset_request.py +++ b/test/test_create_dataset_request.py @@ -38,7 +38,6 @@ def make_instance(self, include_optional) -> CreateDatasetRequest: return CreateDatasetRequest( label = '', source = None, - storage_backend = '', table_name = '' ) else: diff --git a/test/test_get_result_response.py b/test/test_get_result_response.py index fb35b1c..c40f3cc 100644 --- a/test/test_get_result_response.py +++ b/test/test_get_result_response.py @@ -44,7 +44,7 @@ def make_instance(self, include_optional) -> GetResultResponse: True ], result_id = '', - row_count = 0, + row_count = 56, rows = [ [ null diff --git a/test/test_list_query_runs_response.py b/test/test_list_query_runs_response.py index ee78d90..5580b42 100644 --- a/test/test_list_query_runs_response.py +++ b/test/test_list_query_runs_response.py @@ -42,6 +42,7 @@ def make_instance(self, include_optional) -> ListQueryRunsResponse: next_cursor = '', query_runs = [ hotdata.models.query_run_info.QueryRunInfo( + bytes_scanned = 56, completed_at = datetime.datetime.strptime('2013-10-20 19:20:30.00', '%Y-%m-%d %H:%M:%S.%f'), created_at = datetime.datetime.strptime('2013-10-20 19:20:30.00', '%Y-%m-%d %H:%M:%S.%f'), error_message = '', @@ -49,6 +50,7 @@ def make_instance(self, include_optional) -> ListQueryRunsResponse: id = '', result_id = '', row_count = 56, + rows_scanned = 56, saved_query_id = '', saved_query_version = 56, server_processing_ms = 56, @@ -68,6 +70,7 @@ def make_instance(self, include_optional) -> ListQueryRunsResponse: limit = 0, query_runs = [ hotdata.models.query_run_info.QueryRunInfo( + bytes_scanned = 56, completed_at = datetime.datetime.strptime('2013-10-20 19:20:30.00', '%Y-%m-%d %H:%M:%S.%f'), created_at = datetime.datetime.strptime('2013-10-20 19:20:30.00', '%Y-%m-%d %H:%M:%S.%f'), error_message = '', @@ -75,6 +78,7 @@ def make_instance(self, include_optional) -> ListQueryRunsResponse: id = '', result_id = '', row_count = 56, + rows_scanned = 56, saved_query_id = '', saved_query_version = 56, server_processing_ms = 56, diff --git a/test/test_query_run_info.py b/test/test_query_run_info.py index da3a982..b64a682 100644 --- a/test/test_query_run_info.py +++ b/test/test_query_run_info.py @@ -36,6 +36,7 @@ def make_instance(self, include_optional) -> QueryRunInfo: model = QueryRunInfo() if include_optional: return QueryRunInfo( + bytes_scanned = 56, completed_at = datetime.datetime.strptime('2013-10-20 19:20:30.00', '%Y-%m-%d %H:%M:%S.%f'), created_at = datetime.datetime.strptime('2013-10-20 19:20:30.00', '%Y-%m-%d %H:%M:%S.%f'), error_message = '', @@ -43,6 +44,7 @@ def make_instance(self, include_optional) -> QueryRunInfo: id = '', result_id = '', row_count = 56, + rows_scanned = 56, saved_query_id = '', saved_query_version = 56, server_processing_ms = 56,