Skip to content

Commit 12ad08f

Browse files
committed
fix
1 parent 3cb667a commit 12ad08f

10 files changed

Lines changed: 28 additions & 112 deletions

File tree

paimon-diskann/PARAMETER_TUNING.md

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -108,14 +108,6 @@ DiskANN is a graph-based approximate nearest neighbor (ANN) search algorithm des
108108
- **2,000,000**: Default for balanced performance
109109
- **5,000,000+**: For large-scale production systems with ample resources
110110

111-
#### `vector.diskann.index-type`
112-
- **Default**: MEMORY
113-
- **Options**: MEMORY, DISK
114-
- **Description**: Type of index structure
115-
- **Recommendations**:
116-
- **MEMORY**: For datasets that fit in RAM (best performance)
117-
- **DISK**: For datasets exceeding RAM (requires SSD)
118-
119111
## Performance Tuning Guide
120112

121113
### High Recall (>95%)
@@ -139,14 +131,6 @@ vector.diskann.build-list-size = 75
139131
vector.diskann.search-list-size = 32
140132
```
141133

142-
### Memory-Constrained
143-
```properties
144-
vector.diskann.max-degree = 32
145-
vector.diskann.build-list-size = 75
146-
vector.size-per-index = 500000
147-
vector.diskann.index-type = DISK
148-
```
149-
150134
## Best Practices
151135

152136
1. **Start with defaults**: The default parameters are tuned for balanced performance

paimon-diskann/paimon-diskann-e2e-test/src/test/scala/org/apache/paimon/spark/sql/DiskAnnVectorIndexE2ETest.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ class DiskAnnVectorIndexE2ETest extends PaimonSparkTestBase {
8282
spark.sql(s"INSERT INTO T VALUES $values")
8383

8484
val output = spark
85-
.sql("CALL sys.create_global_index(table => 'test.T', index_column => 'v', index_type => 'diskann-vector-ann', options => 'vector.dim=3,vector.diskann.index-type=MEMORY')")
85+
.sql("CALL sys.create_global_index(table => 'test.T', index_column => 'v', index_type => 'diskann-vector-ann', options => 'vector.dim=3')")
8686
.collect()
8787
.head
8888
assert(output.getBoolean(0))

paimon-diskann/paimon-diskann-index/src/main/java/org/apache/paimon/diskann/index/DiskAnnIndex.java

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -45,14 +45,9 @@ private DiskAnnIndex(Index index, int dimension, int buildListSize) {
4545
}
4646

4747
public static DiskAnnIndex create(
48-
int dimension,
49-
DiskAnnVectorMetric metric,
50-
DiskAnnIndexType indexType,
51-
int maxDegree,
52-
int buildListSize) {
48+
int dimension, DiskAnnVectorMetric metric, int maxDegree, int buildListSize) {
5349
MetricType metricType = metric.toMetricType();
54-
Index index =
55-
Index.create(dimension, metricType, indexType.value(), maxDegree, buildListSize);
50+
Index index = Index.create(dimension, metricType, 0, maxDegree, buildListSize);
5651
return new DiskAnnIndex(index, dimension, buildListSize);
5752
}
5853

paimon-diskann/paimon-diskann-index/src/main/java/org/apache/paimon/diskann/index/DiskAnnIndexType.java

Lines changed: 0 additions & 34 deletions
This file was deleted.

paimon-diskann/paimon-diskann-index/src/main/java/org/apache/paimon/diskann/index/DiskAnnVectorGlobalIndexWriter.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -278,7 +278,7 @@ private void flushCurrentIndex() throws IOException {
278278
new DiskAnnIndexMeta(
279279
dim,
280280
options.metric().toMetricType().value(),
281-
options.indexType().value(),
281+
0,
282282
currentIndexCount,
283283
currentIndexMinId,
284284
currentIndexMaxId,
@@ -300,7 +300,6 @@ private DiskAnnIndex createIndex() {
300300
return DiskAnnIndex.create(
301301
options.dimension(),
302302
options.metric(),
303-
options.indexType(),
304303
options.maxDegree(),
305304
options.buildListSize());
306305
}

paimon-diskann/paimon-diskann-index/src/main/java/org/apache/paimon/diskann/index/DiskAnnVectorIndexOptions.java

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -38,12 +38,6 @@ public class DiskAnnVectorIndexOptions {
3838
.withDescription(
3939
"The similarity metric for vector search (L2, INNER_PRODUCT, COSINE), and L2 is the default");
4040

41-
public static final ConfigOption<DiskAnnIndexType> VECTOR_INDEX_TYPE =
42-
ConfigOptions.key("vector.diskann.index-type")
43-
.enumType(DiskAnnIndexType.class)
44-
.defaultValue(DiskAnnIndexType.MEMORY)
45-
.withDescription("The DiskANN index type to use (MEMORY, DISK)");
46-
4741
public static final ConfigOption<Integer> VECTOR_MAX_DEGREE =
4842
ConfigOptions.key("vector.diskann.max-degree")
4943
.intType()
@@ -99,7 +93,6 @@ public class DiskAnnVectorIndexOptions {
9993

10094
private final int dimension;
10195
private final DiskAnnVectorMetric metric;
102-
private final DiskAnnIndexType indexType;
10396
private final int maxDegree;
10497
private final int buildListSize;
10598
private final int searchListSize;
@@ -112,7 +105,6 @@ public class DiskAnnVectorIndexOptions {
112105
public DiskAnnVectorIndexOptions(Options options) {
113106
this.dimension = options.get(VECTOR_DIM);
114107
this.metric = options.get(VECTOR_METRIC);
115-
this.indexType = options.get(VECTOR_INDEX_TYPE);
116108
this.maxDegree = options.get(VECTOR_MAX_DEGREE);
117109
this.buildListSize = options.get(VECTOR_BUILD_LIST_SIZE);
118110
this.searchListSize = options.get(VECTOR_SEARCH_LIST_SIZE);
@@ -137,10 +129,6 @@ public DiskAnnVectorMetric metric() {
137129
return metric;
138130
}
139131

140-
public DiskAnnIndexType indexType() {
141-
return indexType;
142-
}
143-
144132
public int maxDegree() {
145133
return maxDegree;
146134
}

paimon-diskann/paimon-diskann-index/src/main/java/org/apache/paimon/diskann/index/FileIOGraphReader.java

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,6 @@ public class FileIOGraphReader implements Closeable {
7070
// ---- Header fields ----
7171
private final int dimension;
7272
private final int metricValue;
73-
private final int indexTypeValue;
7473
private final int maxDegree;
7574
private final int buildListSize;
7675
private final int count;
@@ -113,7 +112,7 @@ public FileIOGraphReader(SeekableInputStream input, int cacheSize) throws IOExce
113112
off += 4;
114113
this.metricValue = readInt(headerBuf, off);
115114
off += 4;
116-
this.indexTypeValue = readInt(headerBuf, off);
115+
// skip indexType(4)
117116
off += 4;
118117
this.maxDegree = readInt(headerBuf, off);
119118
off += 4;
@@ -172,10 +171,6 @@ public int getMetricValue() {
172171
return metricValue;
173172
}
174173

175-
public int getIndexTypeValue() {
176-
return indexTypeValue;
177-
}
178-
179174
public int getMaxDegree() {
180175
return maxDegree;
181176
}

paimon-diskann/paimon-diskann-index/src/test/java/org/apache/paimon/diskann/index/DiskAnnVectorGlobalIndexScanTest.java

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,6 @@ public void before() throws Exception {
105105
.option(CoreOptions.BUCKET.key(), "-1")
106106
.option("vector.dim", "2")
107107
.option("vector.metric", "L2")
108-
.option("vector.diskann.index-type", "MEMORY")
109108
.option("data-evolution.enabled", "true")
110109
.option("row-tracking.enabled", "true")
111110
.build();
@@ -160,7 +159,6 @@ public void testVectorIndexScanWithDifferentMetrics() throws Exception {
160159
.option(CoreOptions.BUCKET.key(), "-1")
161160
.option("vector.dim", "2")
162161
.option("vector.metric", "INNER_PRODUCT")
163-
.option("vector.diskann.index-type", "MEMORY")
164162
.option("data-evolution.enabled", "true")
165163
.option("row-tracking.enabled", "true")
166164
.build();

paimon-diskann/paimon-diskann-index/src/test/java/org/apache/paimon/diskann/index/DiskAnnVectorGlobalIndexTest.java

Lines changed: 23 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -153,43 +153,36 @@ public void testDifferentMetrics() throws IOException {
153153
}
154154

155155
@Test
156-
public void testDifferentIndexTypes() throws IOException {
156+
public void testDefaultOptions() throws IOException {
157157
int dimension = 32;
158158
int numVectors = 100;
159159

160-
String[] indexTypes = {"MEMORY"};
161-
162-
for (String indexType : indexTypes) {
163-
Options options = createDefaultOptions(dimension);
164-
options.setString("vector.diskann.index-type", indexType);
165-
DiskAnnVectorIndexOptions indexOptions = new DiskAnnVectorIndexOptions(options);
166-
Path typeIndexPath = new Path(indexPath, indexType.toLowerCase());
167-
GlobalIndexFileWriter fileWriter = createFileWriter(typeIndexPath);
168-
DiskAnnVectorGlobalIndexWriter writer =
169-
new DiskAnnVectorGlobalIndexWriter(fileWriter, vectorType, indexOptions);
160+
Options options = createDefaultOptions(dimension);
161+
DiskAnnVectorIndexOptions indexOptions = new DiskAnnVectorIndexOptions(options);
162+
GlobalIndexFileWriter fileWriter = createFileWriter(indexPath);
163+
DiskAnnVectorGlobalIndexWriter writer =
164+
new DiskAnnVectorGlobalIndexWriter(fileWriter, vectorType, indexOptions);
170165

171-
List<float[]> testVectors = generateRandomVectors(numVectors, dimension);
172-
testVectors.forEach(writer::write);
166+
List<float[]> testVectors = generateRandomVectors(numVectors, dimension);
167+
testVectors.forEach(writer::write);
173168

174-
List<ResultEntry> results = writer.finish();
175-
assertThat(results).hasSize(1);
169+
List<ResultEntry> results = writer.finish();
170+
assertThat(results).hasSize(1);
176171

177-
ResultEntry result = results.get(0);
178-
GlobalIndexFileReader fileReader = createFileReader(typeIndexPath);
179-
List<GlobalIndexIOMeta> metas = new ArrayList<>();
180-
metas.add(
181-
new GlobalIndexIOMeta(
182-
new Path(typeIndexPath, result.fileName()),
183-
fileIO.getFileSize(new Path(typeIndexPath, result.fileName())),
184-
result.meta()));
172+
ResultEntry result = results.get(0);
173+
GlobalIndexFileReader fileReader = createFileReader(indexPath);
174+
List<GlobalIndexIOMeta> metas = new ArrayList<>();
175+
metas.add(
176+
new GlobalIndexIOMeta(
177+
new Path(indexPath, result.fileName()),
178+
fileIO.getFileSize(new Path(indexPath, result.fileName())),
179+
result.meta()));
185180

186-
try (DiskAnnVectorGlobalIndexReader reader =
187-
new DiskAnnVectorGlobalIndexReader(
188-
fileReader, metas, vectorType, indexOptions)) {
189-
VectorSearch vectorSearch = new VectorSearch(testVectors.get(0), 5, fieldName);
190-
GlobalIndexResult searchResult = reader.visitVectorSearch(vectorSearch).get();
191-
assertThat(searchResult).isNotNull();
192-
}
181+
try (DiskAnnVectorGlobalIndexReader reader =
182+
new DiskAnnVectorGlobalIndexReader(fileReader, metas, vectorType, indexOptions)) {
183+
VectorSearch vectorSearch = new VectorSearch(testVectors.get(0), 5, fieldName);
184+
GlobalIndexResult searchResult = reader.visitVectorSearch(vectorSearch).get();
185+
assertThat(searchResult).isNotNull();
193186
}
194187
}
195188

@@ -448,7 +441,6 @@ private Options createDefaultOptions(int dimension) {
448441
Options options = new Options();
449442
options.setInteger("vector.dim", dimension);
450443
options.setString("vector.metric", "L2");
451-
options.setString("vector.diskann.index-type", "MEMORY");
452444
options.setInteger("vector.diskann.max-degree", 64);
453445
options.setInteger("vector.diskann.build-list-size", 100);
454446
options.setInteger("vector.diskann.search-list-size", 100);

paimon-diskann/paimon-diskann-jni/src/main/native/src/lib.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1055,7 +1055,6 @@ pub extern "system" fn Java_org_apache_paimon_diskann_DiskAnnNative_indexCreateS
10551055

10561056
let dimension = call_int!("getDimension");
10571057
let metric_type = call_int!("getMetricValue");
1058-
let _index_type = call_int!("getIndexTypeValue");
10591058
let max_degree = call_int!("getMaxDegree") as usize;
10601059
let build_ls = call_int!("getBuildListSize") as usize;
10611060
let count = call_int!("getCount") as usize;

0 commit comments

Comments
 (0)