Skip to content

Commit 488d879

Browse files
committed
Fix default sort again
1 parent 52240b1 commit 488d879

3 files changed

Lines changed: 205 additions & 15 deletions

File tree

quickwit/quickwit-search/src/collector.rs

Lines changed: 61 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -275,12 +275,12 @@ impl SortingFieldExtractorComponent {
275275
internal_repr: InternalValueRepr<V>,
276276
order: SortOrder,
277277
) -> tantivy::Result<Option<SortByValue>> {
278-
let Some((col_idx, val_as_u64)) = internal_repr.decode(order) else {
279-
return Ok(Some(SortByValue { sort_value: None }));
280-
};
281278
if V::is_elided() {
282279
return Ok(None);
283280
}
281+
let Some((col_idx, val_as_u64)) = internal_repr.decode(order) else {
282+
return Ok(Some(SortByValue { sort_value: None }));
283+
};
284284
let sort_value = match self {
285285
SortingFieldExtractorComponent::FastField(FastFieldExtractor {
286286
sort_columns, ..
@@ -2198,6 +2198,64 @@ mod tests {
21982198
}
21992199
}
22002200

2201+
#[test]
2202+
fn test_single_split_default_sort() {
2203+
let dataset: Vec<serde_json::Value> = vec![
2204+
serde_json::json!({"sort_u64_1": 15}), // doc 0
2205+
serde_json::json!({"sort_u64_1": 13}), // doc 1
2206+
serde_json::json!({"sort_u64_1": 10}), // doc 2
2207+
serde_json::json!({"sort_u64_1": 12}), // doc 3
2208+
serde_json::json!({"sort_u64_1": 9}), // doc 4
2209+
];
2210+
2211+
let index = make_index(&dataset);
2212+
let reader = index.reader().unwrap();
2213+
let searcher = reader.searcher();
2214+
2215+
let request = SearchRequest {
2216+
max_hits: 3,
2217+
sort_fields: vec![],
2218+
search_after: None,
2219+
..SearchRequest::default()
2220+
};
2221+
let collector = super::make_collector_for_split(
2222+
"fake_split_id".to_string(),
2223+
&request,
2224+
Default::default(),
2225+
)
2226+
.unwrap();
2227+
let res = searcher
2228+
.search(&tantivy::query::AllQuery, &collector)
2229+
.unwrap();
2230+
// assert the exact hits where in other tests we mostly focus on the order
2231+
assert_eq!(
2232+
res.partial_hits,
2233+
vec![
2234+
PartialHit {
2235+
split_id: "fake_split_id".to_string(),
2236+
segment_ord: 0,
2237+
doc_id: 4,
2238+
sort_value: None,
2239+
sort_value2: None,
2240+
},
2241+
PartialHit {
2242+
split_id: "fake_split_id".to_string(),
2243+
segment_ord: 0,
2244+
doc_id: 3,
2245+
sort_value: None,
2246+
sort_value2: None,
2247+
},
2248+
PartialHit {
2249+
split_id: "fake_split_id".to_string(),
2250+
segment_ord: 0,
2251+
doc_id: 2,
2252+
sort_value: None,
2253+
sort_value2: None,
2254+
},
2255+
]
2256+
);
2257+
}
2258+
22012259
/// Merge intermediate results, asserting that both the regular and
22022260
/// incremental merge produce the same output.
22032261
fn merge_on_both_collectors(

quickwit/quickwit-search/src/root.rs

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -444,20 +444,10 @@ fn validate_sort_by_fields_and_search_after(
444444
}
445445

446446
let mut search_after_sort_value_count = 0;
447-
// TODO: we could validate if the search after sort value types of consistent with the sort
448-
// field types.
449-
if let Some(sort_by_value) = search_after_partial_hit.sort_value.as_ref() {
450-
sort_by_value
451-
.sort_value
452-
.as_ref()
453-
.context("sort value must be set")?;
447+
if search_after_partial_hit.sort_value.is_some() {
454448
search_after_sort_value_count += 1;
455449
}
456-
if let Some(sort_by_value_2) = search_after_partial_hit.sort_value2.as_ref() {
457-
sort_by_value_2
458-
.sort_value
459-
.as_ref()
460-
.context("sort value must be set")?;
450+
if search_after_partial_hit.sort_value2.is_some() {
461451
search_after_sort_value_count += 1;
462452
}
463453
if search_after_sort_value_count != sort_fields_without_doc_count {

quickwit/quickwit-search/src/tests.rs

Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
use std::cmp::Ordering;
1616
use std::collections::{BTreeMap, BTreeSet};
17+
use std::vec;
1718

1819
use assert_json_diff::{assert_json_eq, assert_json_include};
1920
use quickwit_config::SearcherConfig;
@@ -2381,3 +2382,144 @@ async fn test_sort_by_dynamic_with_datetime_page_fails() -> anyhow::Result<()> {
23812382
test_sandbox.assert_quit().await;
23822383
Ok(())
23832384
}
2385+
2386+
#[tokio::test]
2387+
async fn test_sort_by_two_fields_with_null() -> anyhow::Result<()> {
2388+
let index_id = "sort-datetime-millis-search-after";
2389+
let doc_mapping_yaml = r#"
2390+
field_mappings:
2391+
- name: ts
2392+
type: datetime
2393+
fast: true
2394+
- name: body
2395+
type: text
2396+
fast: true
2397+
timestamp_field: ts
2398+
"#;
2399+
let test_sandbox = TestSandbox::create(index_id, doc_mapping_yaml, "{}", &["body"]).await?;
2400+
2401+
// timestamps with 10 digits should be interpreted as secs
2402+
let docs: Vec<_> = vec![
2403+
json!({"ts": 1_000_000_001i64, "body": format!("doc 9")}),
2404+
json!({"ts": 1_000_000_002i64, "body": format!("doc 8")}),
2405+
json!({"ts": 1_000_000_003i64, "body": format!("doc 7")}),
2406+
json!({"ts": 1_000_000_004i64}),
2407+
json!({"ts": 1_000_000_005i64}),
2408+
json!({"ts": 1_000_000_006i64}),
2409+
];
2410+
test_sandbox.add_documents(docs).await?;
2411+
2412+
let sort_fields = vec![
2413+
SortField {
2414+
field_name: "body".to_string(),
2415+
sort_order: SortOrder::Asc as i32,
2416+
..Default::default()
2417+
},
2418+
SortField {
2419+
field_name: "ts".to_string(),
2420+
sort_order: SortOrder::Asc as i32,
2421+
sort_datetime_format: Some(SortDatetimeFormat::UnixTimestampMillis as i32),
2422+
},
2423+
];
2424+
2425+
let page1 = single_node_search(
2426+
SearchRequest {
2427+
index_id_patterns: vec![index_id.to_string()],
2428+
query_ast: qast_json_helper("*", &["body"]),
2429+
max_hits: 5,
2430+
sort_fields: sort_fields.clone(),
2431+
..Default::default()
2432+
},
2433+
test_sandbox.metastore(),
2434+
test_sandbox.storage_resolver(),
2435+
)
2436+
.await?;
2437+
2438+
assert_eq!(page1.num_hits, 6);
2439+
assert_eq!(page1.hits.len(), 5);
2440+
let page_1_hits = page1
2441+
.hits
2442+
.iter()
2443+
.map(|hit| hit.partial_hit.clone().unwrap())
2444+
.collect::<Vec<_>>();
2445+
let split_id = page_1_hits[0].split_id.clone();
2446+
// for the timestamp field we convert to sort_datetime_format repr as I64
2447+
assert_eq!(
2448+
page_1_hits,
2449+
vec![
2450+
PartialHit {
2451+
sort_value: Some(SortValue::Str("doc 7".to_string()).into()),
2452+
sort_value2: Some(SortValue::I64(1_000_000_003_000).into()),
2453+
split_id: split_id.clone(),
2454+
segment_ord: 0,
2455+
doc_id: 2,
2456+
},
2457+
PartialHit {
2458+
sort_value: Some(SortValue::Str("doc 8".to_string()).into()),
2459+
sort_value2: Some(SortValue::I64(1_000_000_002_000).into()),
2460+
split_id: split_id.clone(),
2461+
segment_ord: 0,
2462+
doc_id: 1,
2463+
},
2464+
PartialHit {
2465+
sort_value: Some(SortValue::Str("doc 9".to_string()).into()),
2466+
sort_value2: Some(SortValue::I64(1_000_000_001_000).into()),
2467+
split_id: split_id.clone(),
2468+
segment_ord: 0,
2469+
doc_id: 0,
2470+
},
2471+
PartialHit {
2472+
sort_value: Some(SortByValue { sort_value: None }),
2473+
sort_value2: Some(SortValue::I64(1_000_000_004_000).into()),
2474+
split_id: split_id.clone(),
2475+
segment_ord: 0,
2476+
doc_id: 3,
2477+
},
2478+
PartialHit {
2479+
sort_value: Some(SortByValue { sort_value: None }),
2480+
sort_value2: Some(SortValue::I64(1_000_000_005_000).into()),
2481+
split_id: split_id.clone(),
2482+
segment_ord: 0,
2483+
doc_id: 4,
2484+
},
2485+
]
2486+
);
2487+
2488+
let page2 = single_node_search(
2489+
SearchRequest {
2490+
index_id_patterns: vec![index_id.to_string()],
2491+
query_ast: qast_json_helper("*", &["body"]),
2492+
max_hits: 5,
2493+
sort_fields: sort_fields.clone(),
2494+
search_after: Some(page_1_hits[4].clone()),
2495+
..Default::default()
2496+
},
2497+
test_sandbox.metastore(),
2498+
test_sandbox.storage_resolver(),
2499+
)
2500+
.await
2501+
.unwrap();
2502+
2503+
assert_eq!(page2.num_hits, 6);
2504+
assert_eq!(page2.hits.len(), 1);
2505+
let page_2_hits = page2
2506+
.hits
2507+
.iter()
2508+
.map(|hit| hit.partial_hit.clone().unwrap())
2509+
.collect::<Vec<_>>();
2510+
let split_id = page_2_hits[0].split_id.clone();
2511+
// for the timestamp field we convert to sort_datetime_format repr as I64
2512+
assert_eq!(
2513+
page_2_hits,
2514+
vec![PartialHit {
2515+
sort_value: Some(SortByValue { sort_value: None }),
2516+
sort_value2: Some(SortValue::I64(1_000_000_006_000).into()),
2517+
split_id: split_id.clone(),
2518+
segment_ord: 0,
2519+
doc_id: 5,
2520+
},]
2521+
);
2522+
2523+
test_sandbox.assert_quit().await;
2524+
Ok(())
2525+
}

0 commit comments

Comments
 (0)