Skip to content

Commit 0b42651

Browse files
airborne12claude
andcommitted
[test](search) Add test cases for multi-field search Lucene mode and cross_fields behavior
- Add test data id=9 to verify cross_fields vs best_fields semantics - Add Test 2b: multi_field_multi_term_and_lucene to test default_operator:and with mode:lucene - Add Test 11b: multi_field_cross_fields_verify to explicitly verify cross_fields behavior - Update expected output file with new test case results Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent a5a29ca commit 0b42651

2 files changed

Lines changed: 60 additions & 1 deletion

File tree

regression-test/data/search/test_search_multi_field.out

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,22 +3,31 @@
33
1 machine learning basics
44
4 machine maintenance
55
8 cooking machine reviews
6+
9 machine guide
67

78
-- !multi_field_multi_term_and --
89
1 machine learning basics
10+
9 machine guide
11+
12+
-- !multi_field_multi_term_and_lucene --
13+
1 machine learning basics
14+
9 machine guide
915

1016
-- !multi_field_multi_term_or --
1117
1 machine learning basics
1218
4 machine maintenance
1319
5 learning guitar
1420
6 deep learning neural networks
1521
8 cooking machine reviews
22+
9 machine guide
1623

1724
-- !multi_field_explicit_and --
1825
1 machine learning basics
26+
9 machine guide
1927

2028
-- !multi_field_mixed --
2129
1 machine learning basics tech
30+
9 machine guide tech
2231

2332
-- !three_fields --
2433
1 machine learning basics
@@ -29,53 +38,68 @@
2938
1 machine learning basics
3039
5 learning guitar
3140
6 deep learning neural networks
41+
9 machine guide
3242

3343
-- !multi_field_not --
3444
1 machine learning basics
3545
4 machine maintenance
46+
9 machine guide
3647

3748
-- !multi_field_complex --
3849
1 machine learning basics
3950
3 AI in healthcare
4051
4 machine maintenance
4152
6 deep learning neural networks
53+
9 machine guide
4254

4355
-- !single_field_array --
4456
1 machine learning basics
4557
4 machine maintenance
4658
8 cooking machine reviews
59+
9 machine guide
4760

4861
-- !multi_field_lucene_and --
4962
1 machine learning basics
63+
9 machine guide
64+
65+
-- !multi_field_cross_fields_verify --
66+
1 machine learning basics introduction to AI and ML
67+
9 machine guide learning tips
5068

5169
-- !multi_field_lucene_or --
5270
1 machine learning basics
5371
2 cooking recipes
5472
4 machine maintenance
5573
8 cooking machine reviews
74+
9 machine guide
5675

5776
-- !multi_field_lucene_and_or --
5877
1 machine learning basics
5978
4 machine maintenance
6079
8 cooking machine reviews
80+
9 machine guide
6181

6282
-- !multi_field_lucene_min_should_1 --
6383
1 machine learning basics
6484
8 cooking machine reviews
85+
9 machine guide
6586

6687
-- !multi_field_lucene_and_not --
6788
1 machine learning basics
6889
8 cooking machine reviews
90+
9 machine guide
6991

7092
-- !compare_default_field --
7193
1 machine learning basics
7294
4 machine maintenance
7395
8 cooking machine reviews
96+
9 machine guide
7497

7598
-- !compare_fields_single --
7699
1 machine learning basics
77100
4 machine maintenance
78101
8 cooking machine reviews
102+
9 machine guide
79103

80104
-- !multi_field_exact --
81105

@@ -84,4 +108,5 @@
84108
2 cooking recipes
85109
4 machine maintenance
86110
8 cooking machine reviews
111+
9 machine guide
87112

regression-test/suites/search/test_search_multi_field.groovy

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,9 @@ suite("test_search_multi_field") {
5454
"""
5555

5656
// Insert test data
57+
// Note: id=9 is specifically designed to test cross_fields vs best_fields behavior
58+
// - cross_fields: matches (title has 'machine', content has 'learning')
59+
// - best_fields: does NOT match (no single field has both terms)
5760
sql """INSERT INTO ${tableName} VALUES
5861
(1, 'machine learning basics', 'introduction to AI and ML', 'ml ai tutorial', 'tech'),
5962
(2, 'cooking recipes', 'how to make pasta', 'food cooking', 'lifestyle'),
@@ -62,7 +65,8 @@ suite("test_search_multi_field") {
6265
(5, 'learning guitar', 'music lessons for beginners', 'music learning', 'entertainment'),
6366
(6, 'deep learning neural networks', 'advanced AI concepts', 'ai ml deep', 'tech'),
6467
(7, 'car maintenance guide', 'vehicle repair tips', 'auto maintenance', 'automotive'),
65-
(8, 'cooking machine reviews', 'kitchen appliance ratings', 'cooking appliances', 'lifestyle')
68+
(8, 'cooking machine reviews', 'kitchen appliance ratings', 'cooking appliances', 'lifestyle'),
69+
(9, 'machine guide', 'learning tips', 'howto', 'tech')
6670
"""
6771

6872
// Wait for index building
@@ -86,6 +90,20 @@ suite("test_search_multi_field") {
8690
ORDER BY id
8791
"""
8892

93+
// ============ Test 2b: Multiple terms with AND in Lucene mode ============
94+
// Same as Test 2 but with mode:lucene - should have same result
95+
// This tests that default_operator:and works correctly with Lucene mode
96+
// ES behavior comparison:
97+
// - ES best_fields (default): only id=1 (both terms must be in same field)
98+
// - ES cross_fields: id=1 and id=9 (terms can be across different fields)
99+
// - Doris uses cross_fields semantics
100+
qt_multi_field_multi_term_and_lucene """
101+
SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, title
102+
FROM ${tableName}
103+
WHERE search('machine learning', '{"fields":["title","content"],"default_operator":"and","mode":"lucene"}')
104+
ORDER BY id
105+
"""
106+
89107
// ============ Test 3: Multiple terms with OR (default) ============
90108
qt_multi_field_multi_term_or """
91109
SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, title
@@ -151,13 +169,29 @@ suite("test_search_multi_field") {
151169
"""
152170

153171
// ============ Test 11: Multi-field with Lucene mode - simple AND ============
172+
// This is equivalent to Test 2 but uses Lucene mode with explicit AND operator
173+
// Expected: Same result as Test 2 - cross_fields semantics
174+
// - ES best_fields would return: id=1 only (both terms in same field)
175+
// - Doris cross_fields returns: id=1, id=9 (terms can be in different fields)
176+
// id=9: title='machine guide', content='learning tips' - matches cross_fields but not best_fields
154177
qt_multi_field_lucene_and """
155178
SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, title
156179
FROM ${tableName}
157180
WHERE search('machine AND learning', '{"fields":["title","content"],"mode":"lucene","minimum_should_match":0}')
158181
ORDER BY id
159182
"""
160183

184+
// ============ Test 11b: Verify cross_fields behavior explicitly ============
185+
// This test verifies that our implementation uses cross_fields semantics (like ES type:cross_fields)
186+
// Query: "machine AND learning" across title and content
187+
// id=9 has 'machine' in title and 'learning' in content - should match with cross_fields
188+
qt_multi_field_cross_fields_verify """
189+
SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, title, content
190+
FROM ${tableName}
191+
WHERE search('machine AND learning', '{"fields":["title","content"]}')
192+
ORDER BY id
193+
"""
194+
161195
// ============ Test 12: Multi-field with Lucene mode - OR ============
162196
qt_multi_field_lucene_or """
163197
SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, title

0 commit comments

Comments
 (0)