From 128302fa411739ecfeb8038adbae08afd04b0c6a Mon Sep 17 00:00:00 2001 From: Paul Thurlow Date: Fri, 27 Mar 2026 10:46:06 -0700 Subject: [PATCH] Add indexes and text search commands --- src/command.rs | 97 +++++++++++++++++++++++++++ src/indexes.rs | 173 +++++++++++++++++++++++++++++++++++++++++++++++++ src/jobs.rs | 4 +- src/main.rs | 30 ++++++++- src/table.rs | 60 +++++++++++++++-- 5 files changed, 354 insertions(+), 10 deletions(-) create mode 100644 src/indexes.rs diff --git a/src/command.rs b/src/command.rs index b1f8d78..a87f95d 100644 --- a/src/command.rs +++ b/src/command.rs @@ -105,6 +105,46 @@ pub enum Commands { command: Option, }, + /// Manage indexes on a table + Indexes { + /// Workspace ID (defaults to first workspace from login) + #[arg(long, global = true)] + workspace_id: Option, + + #[command(subcommand)] + command: IndexesCommands, + }, + + /// Full-text search across a table column + Search { + /// Search query text + query: String, + + /// Table to search (connection.schema.table) + #[arg(long)] + table: String, + + /// Column to search + #[arg(long)] + column: String, + + /// Columns to display (comma-separated, defaults to all) + #[arg(long)] + select: Option, + + /// Maximum number of results + #[arg(long, default_value = "10")] + limit: u32, + + /// Workspace ID (defaults to first workspace from login) + #[arg(long)] + workspace_id: Option, + + /// Output format + #[arg(long, default_value = "table", value_parser = ["table", "json", "csv"])] + format: String, + }, + /// Generate shell completions Completions { /// Shell to generate completions for @@ -139,6 +179,63 @@ pub enum AuthCommands { Status, } +#[derive(Subcommand)] +pub enum IndexesCommands { + /// List indexes on a table + List { + /// Connection ID + #[arg(long)] + connection_id: String, + + /// Schema name + #[arg(long)] + schema: String, + + /// Table name + #[arg(long)] + table: String, + + /// Output format + #[arg(long, default_value = "table", value_parser = ["table", "json", "yaml"])] + format: String, + }, + + /// Create an index on a table + Create { + /// Connection ID + #[arg(long)] + connection_id: String, + + /// Schema name + #[arg(long)] + schema: String, + + /// Table name + #[arg(long)] + table: String, + + /// Index name + #[arg(long)] + name: String, + + /// Columns to index (comma-separated) + #[arg(long)] + columns: String, + + /// Index type + #[arg(long, default_value = "sorted", value_parser = ["sorted", "bm25", "vector"])] + r#type: String, + + /// Distance metric for vector indexes + #[arg(long, value_parser = ["l2", "cosine", "dot"])] + metric: Option, + + /// Create as a background job + #[arg(long)] + r#async: bool, + }, +} + #[derive(Subcommand)] pub enum JobsCommands { /// List background jobs (shows active jobs by default) diff --git a/src/indexes.rs b/src/indexes.rs new file mode 100644 index 0000000..c3ad5a9 --- /dev/null +++ b/src/indexes.rs @@ -0,0 +1,173 @@ +use crate::config; +use serde::{Deserialize, Serialize}; + +#[derive(Deserialize, Serialize)] +struct Index { + index_name: String, + index_type: String, + columns: Vec, + metric: Option, + status: String, + created_at: String, + updated_at: String, +} + +#[derive(Deserialize)] +struct ListResponse { + indexes: Vec, +} + +pub fn list( + workspace_id: &str, + connection_id: &str, + schema: &str, + table: &str, + format: &str, +) { + let profile_config = match config::load("default") { + Ok(c) => c, + Err(e) => { + eprintln!("{e}"); + std::process::exit(1); + } + }; + + let api_key = match &profile_config.api_key { + Some(key) if key != "PLACEHOLDER" => key.clone(), + _ => { + eprintln!("error: not authenticated. Run 'hotdata auth' to log in."); + std::process::exit(1); + } + }; + + let url = format!( + "{}/connections/{}/tables/{}/{}/indexes", + profile_config.api_url, connection_id, schema, table + ); + let client = reqwest::blocking::Client::new(); + + let resp = match client + .get(&url) + .header("Authorization", format!("Bearer {api_key}")) + .header("X-Workspace-Id", workspace_id) + .send() + { + Ok(r) => r, + Err(e) => { + eprintln!("error connecting to API: {e}"); + std::process::exit(1); + } + }; + + if !resp.status().is_success() { + use crossterm::style::Stylize; + eprintln!("{}", crate::util::api_error(resp.text().unwrap_or_default()).red()); + std::process::exit(1); + } + + let body: ListResponse = match resp.json() { + Ok(v) => v, + Err(e) => { + eprintln!("error parsing response: {e}"); + std::process::exit(1); + } + }; + + match format { + "json" => println!("{}", serde_json::to_string_pretty(&body.indexes).unwrap()), + "yaml" => print!("{}", serde_yaml::to_string(&body.indexes).unwrap()), + "table" => { + if body.indexes.is_empty() { + use crossterm::style::Stylize; + eprintln!("{}", "No indexes found.".dark_grey()); + } else { + let rows: Vec> = body.indexes.iter().map(|i| vec![ + i.index_name.clone(), + i.index_type.clone(), + i.columns.join(", "), + i.metric.clone().unwrap_or_default(), + i.status.clone(), + crate::util::format_date(&i.created_at), + ]).collect(); + crate::table::print(&["NAME", "TYPE", "COLUMNS", "METRIC", "STATUS", "CREATED"], &rows); + } + } + _ => unreachable!(), + } +} + +pub fn create( + workspace_id: &str, + connection_id: &str, + schema: &str, + table: &str, + name: &str, + columns: &str, + index_type: &str, + metric: Option<&str>, + async_mode: bool, +) { + let profile_config = match config::load("default") { + Ok(c) => c, + Err(e) => { + eprintln!("{e}"); + std::process::exit(1); + } + }; + + let api_key = match &profile_config.api_key { + Some(key) if key != "PLACEHOLDER" => key.clone(), + _ => { + eprintln!("error: not authenticated. Run 'hotdata auth' to log in."); + std::process::exit(1); + } + }; + + let cols: Vec<&str> = columns.split(',').map(str::trim).collect(); + let mut body = serde_json::json!({ + "index_name": name, + "columns": cols, + "index_type": index_type, + "async": async_mode, + }); + if let Some(m) = metric { + body["metric"] = serde_json::json!(m); + } + + let url = format!( + "{}/connections/{}/tables/{}/{}/indexes", + profile_config.api_url, connection_id, schema, table + ); + let client = reqwest::blocking::Client::new(); + + let resp = match client + .post(&url) + .header("Authorization", format!("Bearer {api_key}")) + .header("X-Workspace-Id", workspace_id) + .json(&body) + .send() + { + Ok(r) => r, + Err(e) => { + eprintln!("error connecting to API: {e}"); + std::process::exit(1); + } + }; + + if !resp.status().is_success() { + use crossterm::style::Stylize; + eprintln!("{}", crate::util::api_error(resp.text().unwrap_or_default()).red()); + std::process::exit(1); + } + + use crossterm::style::Stylize; + if async_mode { + let body: serde_json::Value = resp.json().unwrap_or_default(); + let job_id = body["job_id"].as_str().unwrap_or("unknown"); + println!("{}", "Index creation submitted.".green()); + println!("job_id: {}", job_id); + println!("{}", "Use 'hotdata jobs ' to check status.".dark_grey()); + } else { + println!("{}", "Index created.".green()); + } +} diff --git a/src/jobs.rs b/src/jobs.rs index bca4281..87c57ed 100644 --- a/src/jobs.rs +++ b/src/jobs.rs @@ -174,9 +174,7 @@ pub fn list( let jobs = if !all && status.is_none() { // Default: show only active jobs (pending + running) - let mut jobs = fetch_jobs(&client, &api_key, &api_url, workspace_id, job_type, Some("pending"), limit, offset); - jobs.extend(fetch_jobs(&client, &api_key, &api_url, workspace_id, job_type, Some("running"), limit, offset)); - jobs + fetch_jobs(&client, &api_key, &api_url, workspace_id, job_type, Some("pending,running"), limit, offset) } else { fetch_jobs(&client, &api_key, &api_url, workspace_id, job_type, status, limit, offset) }; diff --git a/src/main.rs b/src/main.rs index 94470ec..56ca4f5 100644 --- a/src/main.rs +++ b/src/main.rs @@ -4,6 +4,7 @@ mod config; mod connections; mod connections_new; mod datasets; +mod indexes; mod jobs; mod query; mod results; @@ -15,7 +16,7 @@ mod workspace; use anstyle::AnsiColor; use clap::{Parser, builder::Styles}; -use command::{AuthCommands, Commands, ConnectionsCommands, ConnectionsCreateCommands, DatasetsCommands, JobsCommands, ResultsCommands, SkillCommands, TablesCommands, WorkspaceCommands}; +use command::{AuthCommands, Commands, ConnectionsCommands, ConnectionsCreateCommands, DatasetsCommands, IndexesCommands, JobsCommands, ResultsCommands, SkillCommands, TablesCommands, WorkspaceCommands}; #[derive(Parser)] #[command(name = "hotdata", version, about = concat!("Hotdata CLI - Command line interface for Hotdata (v", env!("CARGO_PKG_VERSION"), ")"), long_about = None, disable_version_flag = true)] @@ -195,6 +196,33 @@ fn main() { } } } + Commands::Indexes { workspace_id, command } => { + let workspace_id = resolve_workspace(workspace_id); + match command { + IndexesCommands::List { connection_id, schema, table, format } => { + indexes::list(&workspace_id, &connection_id, &schema, &table, &format) + } + IndexesCommands::Create { connection_id, schema, table, name, columns, r#type, metric, r#async } => { + indexes::create(&workspace_id, &connection_id, &schema, &table, &name, &columns, &r#type, metric.as_deref(), r#async) + } + } + } + Commands::Search { query, table, column, select, limit, workspace_id, format } => { + let workspace_id = resolve_workspace(workspace_id); + let columns = match select.as_deref() { + Some(cols) => format!("{}, score", cols), + None => "*".to_string(), + }; + let sql = format!( + "SELECT {} FROM bm25_search('{}', '{}', '{}') ORDER BY score DESC LIMIT {}", + columns, + table.replace('\'', "''"), + column.replace('\'', "''"), + query.replace('\'', "''"), + limit, + ); + query::execute(&sql, &workspace_id, None, &format) + } Commands::Completions { shell } => { use clap::CommandFactory; use clap_complete::generate; diff --git a/src/table.rs b/src/table.rs index 5596e1a..448f430 100644 --- a/src/table.rs +++ b/src/table.rs @@ -1,6 +1,6 @@ use tabled::settings::{ Color, Modify, Style, - object::{Rows, Segment}, + object::{Columns, Rows, Segment}, style::BorderColor, width::Width, }; @@ -11,32 +11,76 @@ fn term_width() -> usize { .unwrap_or(120) } -fn style_table(table: &mut tabled::Table) { +/// Find column indices whose header ends with "ID" (case-insensitive). +fn id_column_indices(headers: &[impl AsRef]) -> Vec { + headers + .iter() + .enumerate() + .filter(|(_, h)| { + let h = h.as_ref().to_ascii_uppercase(); + h == "ID" || h.ends_with("_ID") || h.ends_with(" ID") + }) + .map(|(i, _)| i) + .collect() +} + +/// Get the width of a column from the first row. +fn first_row_width(rows: &[Vec], col: usize) -> usize { + rows.first() + .and_then(|r| r.get(col)) + .map(|s| s.len()) + .unwrap_or(0) +} + +fn style_table(table: &mut tabled::Table, num_cols: usize, id_col_indices: &[usize], id_widths: &[usize]) { let tw = term_width(); + // Calculate how much space ID columns need (content + 3 for cell padding/borders) + let id_total: usize = id_widths.iter().map(|w| w + 3).sum(); + // Borders: 1 for left edge + 3 per separator between columns + 1 for right edge => but simpler: + // Each column takes width + 3 (padding + border), plus 1 for the final border + let non_id_count = num_cols - id_col_indices.len(); + let overhead = 1; // final border character + let remaining = tw.saturating_sub(id_total + overhead); + let non_id_width = if non_id_count > 0 { remaining / non_id_count } else { 0 }; + + table.with(Style::modern_rounded()); + + // Wrap only non-ID columns to fit; leave ID columns at full width + for col in 0..num_cols { + if id_col_indices.contains(&col) { + continue; + } + table.with(Modify::new(Columns::new(col..=col)).with(Width::wrap(non_id_width).keep_words(true))); + } + table - .with(Style::modern_rounded()) - .with(Width::wrap(tw).keep_words(true)) .with(Modify::new(Segment::all()).with(BorderColor::filled(Color::FG_BRIGHT_BLACK))) .with(Modify::new(Rows::first()).with(Color::FG_GREEN)); } /// Print a table with string data. Headers are &str slices, rows are Vec. pub fn print(headers: &[&str], rows: &[Vec]) { + let id_cols = id_column_indices(headers); + let id_widths: Vec = id_cols.iter().map(|&i| first_row_width(rows, i)).collect(); + let mut builder = tabled::builder::Builder::new(); builder.push_record(headers.iter().map(|h| h.to_string())); for row in rows { builder.push_record(row.iter().map(|c| c.to_string())); } let mut table = builder.build(); - style_table(&mut table); + style_table(&mut table, headers.len(), &id_cols, &id_widths); println!("{table}"); } /// Print a table with JSON-typed data. Numbers, bools, and nulls get per-cell coloring. +/// Uses simple word-wrapping without ID column priority (for user-generated query results). pub fn print_json(headers: &[String], rows: &[Vec]) { use tabled::settings::object::Cell; + let tw = term_width(); + let mut builder = tabled::builder::Builder::new(); builder.push_record(headers.iter().map(|h| h.to_string())); @@ -69,7 +113,11 @@ pub fn print_json(headers: &[String], rows: &[Vec]) { } let mut table = builder.build(); - style_table(&mut table); + table + .with(Style::modern_rounded()) + .with(Width::wrap(tw).keep_words(true)) + .with(Modify::new(Segment::all()).with(BorderColor::filled(Color::FG_BRIGHT_BLACK))) + .with(Modify::new(Rows::first()).with(Color::FG_GREEN)); for (r, c, color) in colored_cells { table.with(Modify::new(Cell::new(r, c)).with(color));