hotdata-dev · pthurlow · Mar 27, 2026 · Mar 27, 2026 · Mar 27, 2026 · Mar 27, 2026
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -34,6 +34,7 @@ nix = { version = "0.29", features = ["fs"] }
 flate2 = "1"
 tar = "0.4"
 semver = "1"
+sqlformat = "0.5.0"
 
 [package.metadata.release]
 pre-release-hook = ["git", "cliff", "-o", "CHANGELOG.md", "--tag", "{{version}}" ]

diff --git a/README.md b/README.md
@@ -56,6 +56,9 @@ API key priority (lowest to highest): config file → `HOTDATA_API_KEY` env var
 | `tables` | `list` | List tables and columns |
 | `datasets` | `list`, `create` | Manage uploaded datasets |
 | `query` | | Execute a SQL query |
+| `queries` | `list`, `create`, `update`, `run` | Manage saved queries |
+| `search` | | Full-text search across a table column |
+| `indexes` | `list`, `create` | Manage indexes on a table |
 | `results` | `list` | Retrieve stored query results |
 | `jobs` | `list` | Manage background jobs |
 | `skills` | `install`, `status` | Manage the hotdata-cli agent skill |
@@ -123,10 +126,12 @@ hotdata datasets list [--workspace-id <id>] [--limit <n>] [--offset <n>] [--form
 hotdata datasets <dataset_id> [--workspace-id <id>] [--format table|json|yaml]
 hotdata datasets create --file data.csv [--label "My Dataset"] [--table-name my_dataset]
 hotdata datasets create --sql "SELECT ..." --label "My Dataset"
+hotdata datasets create --url "https://example.com/data.parquet" --label "My Dataset"
 ```
 
 - Datasets are queryable as `datasets.main.<table_name>`.
-- `--file`, `--sql`, and `--query-id` are mutually exclusive.
+- `--file`, `--sql`, `--query-id`, and `--url` are mutually exclusive.
+- `--url` imports data directly from a URL (supports csv, json, parquet).
 - Format is auto-detected from file extension or content.
 - Piped stdin is supported: `cat data.csv | hotdata datasets create --label "My Dataset"`
 
@@ -139,6 +144,44 @@ hotdata query "<sql>" [--workspace-id <id>] [--connection <connection_id>] [--fo
 - Default format is `table`, which prints results with row count and execution time.
 - Use `--connection` to scope the query to a specific connection.
 
+## Saved Queries
+
+```sh
+hotdata queries list [--limit <n>] [--offset <n>] [--format table|json|yaml]
+hotdata queries <query_id> [--format table|json|yaml]
+hotdata queries create --name "My Query" --sql "SELECT ..." [--description "..."] [--tags "tag1,tag2"]
+hotdata queries update <query_id> [--name "New Name"] [--sql "SELECT ..."] [--description "..."] [--tags "tag1,tag2"]
+hotdata queries run <query_id> [--format table|json|csv]
+```
+
+- `list` shows saved queries with name, description, tags, and version.
+- View a query by ID to see its formatted and syntax-highlighted SQL.
+- `create` requires `--name` and `--sql`. Tags are comma-separated.
+- `update` accepts any combination of fields to change.
+- `run` executes a saved query and displays results like the `query` command.
+
+## Search
+
+```sh
+hotdata search "<query>" --table <connection.schema.table> --column <column> [--select <columns>] [--limit <n>] [--format table|json|csv]
+```
+
+- Full-text search using BM25 across a table column.
+- Requires a BM25 index on the target column (see `indexes create`).
+- Results are ordered by relevance score (descending).
+- `--select` specifies which columns to return (comma-separated, defaults to all). The `score` column is automatically appended when `--select` is used.
+
+## Indexes
+
+```sh
+hotdata indexes list --connection-id <id> --schema <schema> --table <table> [--workspace-id <id>] [--format table|json|yaml]
+hotdata indexes create --connection-id <id> --schema <schema> --table <table> --name <name> --columns <cols> [--type sorted|bm25|vector] [--metric l2|cosine|dot] [--async]
+```
+
+- `list` shows indexes on a table with name, type, columns, status, and creation date.
+- `create` creates an index. Use `--type bm25` for full-text search, `--type vector` for vector search (requires `--metric`).
+- `--async` submits index creation as a background job.
+
 ## Results
 
 ```sh

diff --git a/skills/hotdata-cli/SKILL.md b/skills/hotdata-cli/SKILL.md
@@ -1,6 +1,6 @@
 ---
 name: hotdata-cli
-description: Use this skill when the user wants to run hotdata CLI commands, query the Hotdata API, list workspaces, list connections, create connections, list tables, manage datasets, execute SQL queries, or interact with the hotdata service. Activate when the user says "run hotdata", "query hotdata", "list workspaces", "list connections", "create a connection", "list tables", "list datasets", "create a dataset", "upload a dataset", "execute a query", or asks you to use the hotdata CLI.
+description: Use this skill when the user wants to run hotdata CLI commands, query the Hotdata API, list workspaces, list connections, create connections, list tables, manage datasets, execute SQL queries, manage saved queries, search tables, manage indexes, or interact with the hotdata service. Activate when the user says "run hotdata", "query hotdata", "list workspaces", "list connections", "create a connection", "list tables", "list datasets", "create a dataset", "upload a dataset", "execute a query", "search a table", "list indexes", "create an index", "list saved queries", "run a saved query", or asks you to use the hotdata CLI.
 version: 0.1.5
 ---
 
@@ -138,11 +138,13 @@ hotdata datasets <dataset_id> [--workspace-id <workspace_id>] [--format table|js
 hotdata datasets create --label "My Dataset" --file data.csv [--table-name my_dataset] [--workspace-id <workspace_id>]
 hotdata datasets create --label "My Dataset" --sql "SELECT * FROM ..." [--table-name my_dataset] [--workspace-id <workspace_id>]
 hotdata datasets create --label "My Dataset" --query-id <saved_query_id> [--table-name my_dataset] [--workspace-id <workspace_id>]
+hotdata datasets create --label "My Dataset" --url "https://example.com/data.parquet" [--table-name my_dataset] [--workspace-id <workspace_id>]
 ```
 - `--file` uploads a local file. Omit to pipe data via stdin: `cat data.csv | hotdata datasets create --label "My Dataset"`
 - `--sql` creates a dataset from a SQL query result.
 - `--query-id` creates a dataset from a previously saved query.
-- `--file`, `--sql`, and `--query-id` are mutually exclusive.
+- `--url` imports data directly from a URL (supports csv, json, parquet).
+- `--file`, `--sql`, `--query-id`, and `--url` are mutually exclusive.
 - Format is auto-detected from file extension (`.csv`, `.json`, `.parquet`) or file content.
 - `--label` is optional when `--file` is provided — defaults to the filename without extension. Required for `--sql` and `--query-id`.
 - `--table-name` is optional — derived from the label if omitted.
@@ -176,6 +178,41 @@ hotdata results <result_id> [--workspace-id <workspace_id>] [--format table|json
 - Query results include a `result-id` in the footer (e.g. `[result-id: rslt...]`).
 - **Always use this command to retrieve past query results rather than re-running the same query.** Re-running queries wastes resources and may return different results.
 
+### Saved Queries
+```
+hotdata queries list [--limit <int>] [--offset <int>] [--format table|json|yaml]
+hotdata queries <query_id> [--format table|json|yaml]
+hotdata queries create --name "My Query" --sql "SELECT ..." [--description "..."] [--tags "tag1,tag2"] [--format table|json|yaml]
+hotdata queries update <query_id> [--name "New Name"] [--sql "SELECT ..."] [--description "..."] [--tags "tag1,tag2"] [--format table|json|yaml]
+hotdata queries run <query_id> [--format table|json|csv]
+```
+- `list` shows saved queries with name, description, tags, and version.
+- View a query by ID to see its formatted and syntax-highlighted SQL.
+- `create` requires `--name` and `--sql`. Tags are comma-separated.
+- `update` accepts any combination of fields to change.
+- `run` executes a saved query and displays results like the `query` command.
+- **Use `queries run` instead of re-typing SQL when a saved query exists.**
+
+### Search
+```
+hotdata search "<query>" --table <connection.schema.table> --column <column> [--select <columns>] [--limit <n>] [--format table|json|csv]
+```
+- Full-text search using BM25 across a table column.
+- Requires a BM25 index on the target column (see `indexes create`).
+- Results are ordered by relevance score (descending).
+- `--select` specifies which columns to return (comma-separated, defaults to all). The `score` column is automatically appended when `--select` is used.
+- Default limit is 10.
+
+### Indexes
+```
+hotdata indexes list --connection-id <id> --schema <schema> --table <table> [--workspace-id <workspace_id>] [--format table|json|yaml]
+hotdata indexes create --connection-id <id> --schema <schema> --table <table> --name <name> --columns <cols> [--type sorted|bm25|vector] [--metric l2|cosine|dot] [--async]
+```
+- `list` shows indexes on a table with name, type, columns, status, and creation date.
+- `create` creates an index. Use `--type bm25` for full-text search, `--type vector` for vector search (requires `--metric`).
+- `--async` submits index creation as a background job. Use `hotdata jobs <job_id>` to check status.
+- **Before using `hotdata search`, create a BM25 index on the target column.**
+
 ### Jobs
 ```
 hotdata jobs list [--workspace-id <workspace_id>] [--job-type <type>] [--status <status>] [--all] [--format table|json|yaml]

diff --git a/src/command.rs b/src/command.rs
@@ -145,6 +145,19 @@ pub enum Commands {
         format: String,
     },
 
+    /// Manage saved queries
+    Queries {
+        /// Query ID to show details
+        id: Option<String>,
+
+        /// Output format (used with query ID)
+        #[arg(long, default_value = "table", value_parser = ["table", "json", "yaml"])]
+        format: String,
+
+        #[command(subcommand)]
+        command: Option<QueriesCommands>,
+    },
+
     /// Generate shell completions
     Completions {
         /// Shell to generate completions for
@@ -306,12 +319,16 @@ pub enum DatasetsCommands {
         format: String,
 
         /// SQL query to create the dataset from
-        #[arg(long, conflicts_with_all = ["file", "upload_id", "query_id"])]
+        #[arg(long, conflicts_with_all = ["file", "upload_id", "query_id", "url"])]
         sql: Option<String>,
 
         /// Saved query ID to create the dataset from
-        #[arg(long, conflicts_with_all = ["file", "upload_id", "sql"])]
+        #[arg(long, conflicts_with_all = ["file", "upload_id", "sql", "url"])]
         query_id: Option<String>,
+
+        /// URL to import data from
+        #[arg(long, conflicts_with_all = ["file", "upload_id", "sql", "query_id"])]
+        url: Option<String>,
     },
 }
 
@@ -503,6 +520,91 @@ pub enum ResultsCommands {
     },
 }
 
+#[derive(Subcommand)]
+pub enum QueriesCommands {
+    /// List saved queries
+    List {
+        /// Maximum number of results
+        #[arg(long)]
+        limit: Option<u32>,
+
+        /// Pagination offset
+        #[arg(long)]
+        offset: Option<u32>,
+
+        /// Output format
+        #[arg(long, default_value = "table", value_parser = ["table", "json", "yaml"])]
+        format: String,
+    },
+
+    /// Create a new saved query
+    Create {
+        /// Query name
+        #[arg(long)]
+        name: String,
+
+        /// SQL query string
+        #[arg(long)]
+        sql: String,
+
+        /// Query description
+        #[arg(long)]
+        description: Option<String>,
+
+        /// Comma-separated tags
+        #[arg(long)]
+        tags: Option<String>,
+
+        /// Output format
+        #[arg(long, default_value = "table", value_parser = ["table", "json", "yaml"])]
+        format: String,
+    },
+
+    /// Execute a saved query
+    Run {
+        /// Saved query ID
+        id: String,
+
+        /// Output format
+        #[arg(long, default_value = "table", value_parser = ["table", "json", "csv"])]
+        format: String,
+    },
+
+    /// Update a saved query
+    Update {
+        /// Saved query ID
+        id: String,
+
+        /// New query name
+        #[arg(long)]
+        name: Option<String>,
+
+        /// New SQL query string
+        #[arg(long)]
+        sql: Option<String>,
+
+        /// New description
+        #[arg(long)]
+        description: Option<String>,
+
+        /// Comma-separated tags
+        #[arg(long)]
+        tags: Option<String>,
+
+        /// Override the auto-detected category (pass empty string to clear)
+        #[arg(long)]
+        category: Option<String>,
+
+        /// User annotation for table size (pass empty string to clear)
+        #[arg(long)]
+        table_size: Option<String>,
+
+        /// Output format
+        #[arg(long, default_value = "table", value_parser = ["table", "json", "yaml"])]
+        format: String,
+    },
+}
+
 #[derive(Subcommand)]
 pub enum TablesCommands {
     /// List all tables in a workspace

diff --git a/src/datasets.rs b/src/datasets.rs
@@ -388,6 +388,22 @@ pub fn create_from_upload(
     create_dataset(workspace_id, label, table_name, source, on_failure);
 }
 
+pub fn create_from_url(
+    workspace_id: &str,
+    url: &str,
+    label: Option<&str>,
+    table_name: Option<&str>,
+) {
+    let label = match label {
+        Some(l) => l,
+        None => {
+            eprintln!("error: --label is required when using --url");
+            std::process::exit(1);
+        }
+    };
+    create_dataset(workspace_id, label, table_name, json!({ "Url": { "url": url } }), None);
+}
+
 pub fn create_from_query(
     workspace_id: &str,
     sql: &str,