Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
169 changes: 142 additions & 27 deletions src/indexes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,69 @@ fn connection_lookup(api: &Api) -> HashMap<String, String> {
connection_label_to_id_map(&refs)
}

/// Pick the connection id to address a per-table index call with during a
/// connection-wide scan.
///
/// Prefers the caller-supplied `--connection-id`: it always resolves, including
/// for a database-scoped connection whose `information_schema` `label`
/// (`__db_*`) is absent from `connections list` (that listing hides
/// database-scoped connections, so `name_to_id` can't map it — #161). The scan's
/// tables are already filtered to that connection, so the supplied id is correct
/// for every row. With no `--connection-id` (the list-everything case), maps the
/// label back to an id, falling back to the label itself.
fn scan_connection_id<'a>(
supplied: Option<&'a str>,
label: &'a str,
name_to_id: &'a HashMap<String, String>,
) -> &'a str {
supplied
.or_else(|| name_to_id.get(label).map(String::as_str))
.unwrap_or(label)
}

/// Gather index rows across a connection's (or the workspace's) tables — the
/// `indexes list` path when no full `connection.schema.table` triple is given.
///
/// Enumerates tables via `information_schema`, then fetches each table's indexes
/// by the connection id chosen in [`scan_connection_id`] (the supplied
/// `--connection-id` wins, fixing the database-scoped case in #161). Skipped
/// connections / missing tables surface as no rows for that table, not an error.
fn collect_connection_wide(
api: &Api,
connection_id: Option<&str>,
schema: Option<&str>,
table: Option<&str>,
) -> Vec<IndexRow> {
let tables = collect_tables(api, connection_id, schema, table);
// See `scan_connection_id`: a supplied `--connection-id` is used directly,
// so the name→id map is only needed (and only worth a round-trip) for the
// unscoped, list-everything case (#161).
let conn_ids = if connection_id.is_some() {
HashMap::new()
} else {
connection_lookup(api)
};
let per_table: Vec<(String, Vec<Index>)> = tables
.par_iter()
.map(|t| {
let cid = scan_connection_id(connection_id, &t.connection, &conn_ids);
let full = format!("{}.{}.{}", t.connection, t.schema, t.table);
let indexes = list_one_table_scan(api, cid, &t.schema, &t.table);
(full, indexes)
})
.collect();
let mut rows: Vec<IndexRow> = Vec::new();
for (full, indexes) in per_table {
for i in indexes {
rows.push(IndexRow {
inner: i,
table: Some(full.clone()),
});
}
}
rows
}

/// How to continue after merging one `/information_schema` page.
fn information_schema_followup(
has_more: bool,
Expand Down Expand Up @@ -264,33 +327,10 @@ pub fn list(
.collect();
(rows, false)
}
_ => {
let tables = collect_tables(&api, connection_id, schema, table);
let conn_ids = connection_lookup(&api);
let api = api.clone();
let per_table: Vec<(String, Vec<Index>)> = tables
.par_iter()
.map(|t| {
let cid = conn_ids
.get(&t.connection)
.map(String::as_str)
.unwrap_or(t.connection.as_str());
let full = format!("{}.{}.{}", t.connection, t.schema, t.table);
let indexes = list_one_table_scan(&api, cid, &t.schema, &t.table);
(full, indexes)
})
.collect();
let mut rows: Vec<IndexRow> = Vec::new();
for (full, indexes) in per_table {
for i in indexes {
rows.push(IndexRow {
inner: i,
table: Some(full.clone()),
});
}
}
(rows, true)
}
_ => (
collect_connection_wide(&api, connection_id, schema, table),
true,
),
};

match format {
Expand Down Expand Up @@ -602,6 +642,81 @@ mod tests {
assert!(!m.contains_key("conn-id"));
}

#[test]
fn scan_connection_id_prefers_supplied_id_over_label_map() {
// #161: a managed database's catalog surfaces under an internal
// `__db_*` label that `connections list` hides, so the name→id map is
// empty for it. The supplied --connection-id must win regardless.
let empty = HashMap::new();
assert_eq!(
scan_connection_id(Some("conn-real"), "__db_jz50abc", &empty),
"conn-real"
);
// Even when the label *is* in the map, the supplied id takes precedence.
let mut m = HashMap::new();
m.insert("__db_jz50abc".to_string(), "conn-mapped".to_string());
assert_eq!(
scan_connection_id(Some("conn-real"), "__db_jz50abc", &m),
"conn-real"
);
}

#[test]
fn scan_connection_id_maps_label_when_no_supplied_id() {
let mut m = HashMap::new();
m.insert("Warehouse".to_string(), "conn-id".to_string());
assert_eq!(scan_connection_id(None, "Warehouse", &m), "conn-id");
}

#[test]
fn scan_connection_id_falls_back_to_label_when_unmapped() {
let empty = HashMap::new();
assert_eq!(scan_connection_id(None, "Warehouse", &empty), "Warehouse");
}

#[test]
fn collect_connection_wide_uses_supplied_id_for_db_scoped_label() {
// #161 regression: information_schema reports a managed database's
// catalog under an internal `__db_*` label, but the per-table index
// call must use the supplied --connection-id. The indexes endpoint is
// mocked ONLY for the real id (`conn-real`); had the scan used the
// `__db_*` label (the old behavior), it would miss this mock. No
// `connections list` mock is needed — a supplied id skips that lookup.
let mut server = mockito::Server::new();
let info = server
.mock("GET", "/v1/information_schema")
.match_query(mockito::Matcher::Any)
.with_status(200)
.with_header("content-type", "application/json")
.with_body(
r#"{"count":1,"limit":100,"tables":[
{"connection":"__db_abc","schema":"public","table":"vec_mid","synced":true}
],"has_more":false,"next_cursor":null}"#,
)
.create();
let idx = server
.mock(
"GET",
"/v1/connections/conn-real/tables/public/vec_mid/indexes",
)
.with_status(200)
.with_header("content-type", "application/json")
.with_body(
r#"{"indexes":[{"index_name":"vec_mid_idx","index_type":"vector",
"columns":["c"],"metric":"cosine","status":"ready",
"created_at":"2020-01-01T00:00:00Z","updated_at":"2020-01-01T00:00:00Z"}]}"#,
)
.create();

let api = Api::test_new(&server.url(), "k", Some("ws"));
let rows = collect_connection_wide(&api, Some("conn-real"), None, None);
info.assert();
idx.assert();
assert_eq!(rows.len(), 1);
assert_eq!(rows[0].inner.index_name, "vec_mid_idx");
assert_eq!(rows[0].table.as_deref(), Some("__db_abc.public.vec_mid"));
}

#[test]
fn collect_tables_single_page() {
let mut server = mockito::Server::new();
Expand Down
Loading