Skip to content

Commit b0bfc13

Browse files
authored
feat(queries): Add upload url for datasets and new queries commands (#23)
1 parent ac9a9c7 commit b0bfc13

File tree

10 files changed

+866
-18
lines changed

10 files changed

+866
-18
lines changed

Cargo.lock

Lines changed: 26 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ nix = { version = "0.29", features = ["fs"] }
3434
flate2 = "1"
3535
tar = "0.4"
3636
semver = "1"
37+
sqlformat = "0.5.0"
3738

3839
[package.metadata.release]
3940
pre-release-hook = ["git", "cliff", "-o", "CHANGELOG.md", "--tag", "{{version}}" ]

README.md

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,9 @@ API key priority (lowest to highest): config file → `HOTDATA_API_KEY` env var
5656
| `tables` | `list` | List tables and columns |
5757
| `datasets` | `list`, `create` | Manage uploaded datasets |
5858
| `query` | | Execute a SQL query |
59+
| `queries` | `list`, `create`, `update`, `run` | Manage saved queries |
60+
| `search` | | Full-text search across a table column |
61+
| `indexes` | `list`, `create` | Manage indexes on a table |
5962
| `results` | `list` | Retrieve stored query results |
6063
| `jobs` | `list` | Manage background jobs |
6164
| `skills` | `install`, `status` | Manage the hotdata-cli agent skill |
@@ -123,10 +126,12 @@ hotdata datasets list [--workspace-id <id>] [--limit <n>] [--offset <n>] [--form
123126
hotdata datasets <dataset_id> [--workspace-id <id>] [--format table|json|yaml]
124127
hotdata datasets create --file data.csv [--label "My Dataset"] [--table-name my_dataset]
125128
hotdata datasets create --sql "SELECT ..." --label "My Dataset"
129+
hotdata datasets create --url "https://example.com/data.parquet" --label "My Dataset"
126130
```
127131

128132
- Datasets are queryable as `datasets.main.<table_name>`.
129-
- `--file`, `--sql`, and `--query-id` are mutually exclusive.
133+
- `--file`, `--sql`, `--query-id`, and `--url` are mutually exclusive.
134+
- `--url` imports data directly from a URL (supports csv, json, parquet).
130135
- Format is auto-detected from file extension or content.
131136
- Piped stdin is supported: `cat data.csv | hotdata datasets create --label "My Dataset"`
132137

@@ -139,6 +144,44 @@ hotdata query "<sql>" [--workspace-id <id>] [--connection <connection_id>] [--fo
139144
- Default format is `table`, which prints results with row count and execution time.
140145
- Use `--connection` to scope the query to a specific connection.
141146

147+
## Saved Queries
148+
149+
```sh
150+
hotdata queries list [--limit <n>] [--offset <n>] [--format table|json|yaml]
151+
hotdata queries <query_id> [--format table|json|yaml]
152+
hotdata queries create --name "My Query" --sql "SELECT ..." [--description "..."] [--tags "tag1,tag2"]
153+
hotdata queries update <query_id> [--name "New Name"] [--sql "SELECT ..."] [--description "..."] [--tags "tag1,tag2"]
154+
hotdata queries run <query_id> [--format table|json|csv]
155+
```
156+
157+
- `list` shows saved queries with name, description, tags, and version.
158+
- View a query by ID to see its formatted and syntax-highlighted SQL.
159+
- `create` requires `--name` and `--sql`. Tags are comma-separated.
160+
- `update` accepts any combination of fields to change.
161+
- `run` executes a saved query and displays results like the `query` command.
162+
163+
## Search
164+
165+
```sh
166+
hotdata search "<query>" --table <connection.schema.table> --column <column> [--select <columns>] [--limit <n>] [--format table|json|csv]
167+
```
168+
169+
- Full-text search using BM25 across a table column.
170+
- Requires a BM25 index on the target column (see `indexes create`).
171+
- Results are ordered by relevance score (descending).
172+
- `--select` specifies which columns to return (comma-separated, defaults to all). The `score` column is automatically appended when `--select` is used.
173+
174+
## Indexes
175+
176+
```sh
177+
hotdata indexes list --connection-id <id> --schema <schema> --table <table> [--workspace-id <id>] [--format table|json|yaml]
178+
hotdata indexes create --connection-id <id> --schema <schema> --table <table> --name <name> --columns <cols> [--type sorted|bm25|vector] [--metric l2|cosine|dot] [--async]
179+
```
180+
181+
- `list` shows indexes on a table with name, type, columns, status, and creation date.
182+
- `create` creates an index. Use `--type bm25` for full-text search, `--type vector` for vector search (requires `--metric`).
183+
- `--async` submits index creation as a background job.
184+
142185
## Results
143186

144187
```sh

skills/hotdata-cli/SKILL.md

Lines changed: 39 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
---
22
name: hotdata-cli
3-
description: Use this skill when the user wants to run hotdata CLI commands, query the Hotdata API, list workspaces, list connections, create connections, list tables, manage datasets, execute SQL queries, or interact with the hotdata service. Activate when the user says "run hotdata", "query hotdata", "list workspaces", "list connections", "create a connection", "list tables", "list datasets", "create a dataset", "upload a dataset", "execute a query", or asks you to use the hotdata CLI.
3+
description: Use this skill when the user wants to run hotdata CLI commands, query the Hotdata API, list workspaces, list connections, create connections, list tables, manage datasets, execute SQL queries, manage saved queries, search tables, manage indexes, or interact with the hotdata service. Activate when the user says "run hotdata", "query hotdata", "list workspaces", "list connections", "create a connection", "list tables", "list datasets", "create a dataset", "upload a dataset", "execute a query", "search a table", "list indexes", "create an index", "list saved queries", "run a saved query", or asks you to use the hotdata CLI.
44
version: 0.1.5
55
---
66

@@ -138,11 +138,13 @@ hotdata datasets <dataset_id> [--workspace-id <workspace_id>] [--format table|js
138138
hotdata datasets create --label "My Dataset" --file data.csv [--table-name my_dataset] [--workspace-id <workspace_id>]
139139
hotdata datasets create --label "My Dataset" --sql "SELECT * FROM ..." [--table-name my_dataset] [--workspace-id <workspace_id>]
140140
hotdata datasets create --label "My Dataset" --query-id <saved_query_id> [--table-name my_dataset] [--workspace-id <workspace_id>]
141+
hotdata datasets create --label "My Dataset" --url "https://example.com/data.parquet" [--table-name my_dataset] [--workspace-id <workspace_id>]
141142
```
142143
- `--file` uploads a local file. Omit to pipe data via stdin: `cat data.csv | hotdata datasets create --label "My Dataset"`
143144
- `--sql` creates a dataset from a SQL query result.
144145
- `--query-id` creates a dataset from a previously saved query.
145-
- `--file`, `--sql`, and `--query-id` are mutually exclusive.
146+
- `--url` imports data directly from a URL (supports csv, json, parquet).
147+
- `--file`, `--sql`, `--query-id`, and `--url` are mutually exclusive.
146148
- Format is auto-detected from file extension (`.csv`, `.json`, `.parquet`) or file content.
147149
- `--label` is optional when `--file` is provided — defaults to the filename without extension. Required for `--sql` and `--query-id`.
148150
- `--table-name` is optional — derived from the label if omitted.
@@ -176,6 +178,41 @@ hotdata results <result_id> [--workspace-id <workspace_id>] [--format table|json
176178
- Query results include a `result-id` in the footer (e.g. `[result-id: rslt...]`).
177179
- **Always use this command to retrieve past query results rather than re-running the same query.** Re-running queries wastes resources and may return different results.
178180

181+
### Saved Queries
182+
```
183+
hotdata queries list [--limit <int>] [--offset <int>] [--format table|json|yaml]
184+
hotdata queries <query_id> [--format table|json|yaml]
185+
hotdata queries create --name "My Query" --sql "SELECT ..." [--description "..."] [--tags "tag1,tag2"] [--format table|json|yaml]
186+
hotdata queries update <query_id> [--name "New Name"] [--sql "SELECT ..."] [--description "..."] [--tags "tag1,tag2"] [--format table|json|yaml]
187+
hotdata queries run <query_id> [--format table|json|csv]
188+
```
189+
- `list` shows saved queries with name, description, tags, and version.
190+
- View a query by ID to see its formatted and syntax-highlighted SQL.
191+
- `create` requires `--name` and `--sql`. Tags are comma-separated.
192+
- `update` accepts any combination of fields to change.
193+
- `run` executes a saved query and displays results like the `query` command.
194+
- **Use `queries run` instead of re-typing SQL when a saved query exists.**
195+
196+
### Search
197+
```
198+
hotdata search "<query>" --table <connection.schema.table> --column <column> [--select <columns>] [--limit <n>] [--format table|json|csv]
199+
```
200+
- Full-text search using BM25 across a table column.
201+
- Requires a BM25 index on the target column (see `indexes create`).
202+
- Results are ordered by relevance score (descending).
203+
- `--select` specifies which columns to return (comma-separated, defaults to all). The `score` column is automatically appended when `--select` is used.
204+
- Default limit is 10.
205+
206+
### Indexes
207+
```
208+
hotdata indexes list --connection-id <id> --schema <schema> --table <table> [--workspace-id <workspace_id>] [--format table|json|yaml]
209+
hotdata indexes create --connection-id <id> --schema <schema> --table <table> --name <name> --columns <cols> [--type sorted|bm25|vector] [--metric l2|cosine|dot] [--async]
210+
```
211+
- `list` shows indexes on a table with name, type, columns, status, and creation date.
212+
- `create` creates an index. Use `--type bm25` for full-text search, `--type vector` for vector search (requires `--metric`).
213+
- `--async` submits index creation as a background job. Use `hotdata jobs <job_id>` to check status.
214+
- **Before using `hotdata search`, create a BM25 index on the target column.**
215+
179216
### Jobs
180217
```
181218
hotdata jobs list [--workspace-id <workspace_id>] [--job-type <type>] [--status <status>] [--all] [--format table|json|yaml]

src/command.rs

Lines changed: 104 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,19 @@ pub enum Commands {
145145
format: String,
146146
},
147147

148+
/// Manage saved queries
149+
Queries {
150+
/// Query ID to show details
151+
id: Option<String>,
152+
153+
/// Output format (used with query ID)
154+
#[arg(long, default_value = "table", value_parser = ["table", "json", "yaml"])]
155+
format: String,
156+
157+
#[command(subcommand)]
158+
command: Option<QueriesCommands>,
159+
},
160+
148161
/// Generate shell completions
149162
Completions {
150163
/// Shell to generate completions for
@@ -306,12 +319,16 @@ pub enum DatasetsCommands {
306319
format: String,
307320

308321
/// SQL query to create the dataset from
309-
#[arg(long, conflicts_with_all = ["file", "upload_id", "query_id"])]
322+
#[arg(long, conflicts_with_all = ["file", "upload_id", "query_id", "url"])]
310323
sql: Option<String>,
311324

312325
/// Saved query ID to create the dataset from
313-
#[arg(long, conflicts_with_all = ["file", "upload_id", "sql"])]
326+
#[arg(long, conflicts_with_all = ["file", "upload_id", "sql", "url"])]
314327
query_id: Option<String>,
328+
329+
/// URL to import data from
330+
#[arg(long, conflicts_with_all = ["file", "upload_id", "sql", "query_id"])]
331+
url: Option<String>,
315332
},
316333
}
317334

@@ -503,6 +520,91 @@ pub enum ResultsCommands {
503520
},
504521
}
505522

523+
#[derive(Subcommand)]
524+
pub enum QueriesCommands {
525+
/// List saved queries
526+
List {
527+
/// Maximum number of results
528+
#[arg(long)]
529+
limit: Option<u32>,
530+
531+
/// Pagination offset
532+
#[arg(long)]
533+
offset: Option<u32>,
534+
535+
/// Output format
536+
#[arg(long, default_value = "table", value_parser = ["table", "json", "yaml"])]
537+
format: String,
538+
},
539+
540+
/// Create a new saved query
541+
Create {
542+
/// Query name
543+
#[arg(long)]
544+
name: String,
545+
546+
/// SQL query string
547+
#[arg(long)]
548+
sql: String,
549+
550+
/// Query description
551+
#[arg(long)]
552+
description: Option<String>,
553+
554+
/// Comma-separated tags
555+
#[arg(long)]
556+
tags: Option<String>,
557+
558+
/// Output format
559+
#[arg(long, default_value = "table", value_parser = ["table", "json", "yaml"])]
560+
format: String,
561+
},
562+
563+
/// Execute a saved query
564+
Run {
565+
/// Saved query ID
566+
id: String,
567+
568+
/// Output format
569+
#[arg(long, default_value = "table", value_parser = ["table", "json", "csv"])]
570+
format: String,
571+
},
572+
573+
/// Update a saved query
574+
Update {
575+
/// Saved query ID
576+
id: String,
577+
578+
/// New query name
579+
#[arg(long)]
580+
name: Option<String>,
581+
582+
/// New SQL query string
583+
#[arg(long)]
584+
sql: Option<String>,
585+
586+
/// New description
587+
#[arg(long)]
588+
description: Option<String>,
589+
590+
/// Comma-separated tags
591+
#[arg(long)]
592+
tags: Option<String>,
593+
594+
/// Override the auto-detected category (pass empty string to clear)
595+
#[arg(long)]
596+
category: Option<String>,
597+
598+
/// User annotation for table size (pass empty string to clear)
599+
#[arg(long)]
600+
table_size: Option<String>,
601+
602+
/// Output format
603+
#[arg(long, default_value = "table", value_parser = ["table", "json", "yaml"])]
604+
format: String,
605+
},
606+
}
607+
506608
#[derive(Subcommand)]
507609
pub enum TablesCommands {
508610
/// List all tables in a workspace

src/datasets.rs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -388,6 +388,22 @@ pub fn create_from_upload(
388388
create_dataset(workspace_id, label, table_name, source, on_failure);
389389
}
390390

391+
pub fn create_from_url(
392+
workspace_id: &str,
393+
url: &str,
394+
label: Option<&str>,
395+
table_name: Option<&str>,
396+
) {
397+
let label = match label {
398+
Some(l) => l,
399+
None => {
400+
eprintln!("error: --label is required when using --url");
401+
std::process::exit(1);
402+
}
403+
};
404+
create_dataset(workspace_id, label, table_name, json!({ "Url": { "url": url } }), None);
405+
}
406+
391407
pub fn create_from_query(
392408
workspace_id: &str,
393409
sql: &str,

0 commit comments

Comments
 (0)