-
Notifications
You must be signed in to change notification settings - Fork 26
Expand file tree
/
Copy pathvec_sim.h
More file actions
277 lines (247 loc) · 11.5 KB
/
vec_sim.h
File metadata and controls
277 lines (247 loc) · 11.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
/*
* Copyright (c) 2006-Present, Redis Ltd.
* All rights reserved.
*
* Licensed under your choice of the Redis Source Available License 2.0
* (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
* GNU Affero General Public License v3 (AGPLv3).
*/
#pragma once
#ifdef __cplusplus
extern "C" {
#endif
#include <stdlib.h>
#include "query_results.h"
#include "vec_sim_common.h"
#include "info_iterator.h"
typedef struct VecSimIndexInterface VecSimIndex;
/**
* @brief Create a new VecSim index based on the given params.
* @param params index configurations (initial size, data type, dimension, metric, algorithm and the
* algorithm-related params).
* @return A pointer to the created index.
*/
VecSimIndex *VecSimIndex_New(const VecSimParams *params);
/**
* @brief Estimates the size of an empty index according to the parameters.
* @param params index configurations (initial size, data type, dimension, metric, algorithm and the
* algorithm-related params).
* @return Estimated index size.
*/
size_t VecSimIndex_EstimateInitialSize(const VecSimParams *params);
/**
* @brief Estimates the size of a single vector and its metadata according to the parameters, WHEN
* THE INDEX IS RESIZING BY A BLOCK. That is, this function estimates the allocation size of a new
* block upon resizing all the internal data structures, and returns the size of a single vector in
* that block. This value can be used later to decide what is the best block size for the block
* size, when the memory limit is known.
* ("memory limit for a block" / "size of a single vector in a block" = "block size")
* @param params index configurations (initial size, data type, dimension, metric, algorithm and the
* algorithm-related params).
* @return The estimated single vector memory consumption, considering the parameters.
*/
size_t VecSimIndex_EstimateElementSize(const VecSimParams *params);
/**
* @brief Release an index and its internal data.
* @param index the index to release.
*/
void VecSimIndex_Free(VecSimIndex *index);
/**
* @brief Add a vector to an index.
* @param index the index to which the vector is added.
* @param blob binary representation of the vector. Blob size should match the index data type and
* dimension.
* @param label the label of the added vector
* @return the number of new vectors inserted (1 for new insertion, 0 for override).
*/
int VecSimIndex_AddVector(VecSimIndex *index, const void *blob, size_t label);
/**
* @brief Remove a vector from an index.
* @param index the index from which the vector is removed.
* @param label the label of the removed vector
* @return the number of vectors removed (0 if the label was not found)
*/
int VecSimIndex_DeleteVector(VecSimIndex *index, size_t label);
/**
* @brief Calculate the distance of a vector from an index to a vector. This function assumes that
* the vector fits the index - its type and dimension are the same as the index's, and if the
* index's distance metric is cosine, the vector is already normalized.
* IMPORTANT: for tiered index, this should be called while *locks are locked for shared ownership*,
* as we avoid acquiring the locks internally. That is since this is usually called for every vector
* individually, and the overhead of acquiring and releasing the locks is significant in that case.
* @param index the index from which the first vector is located, and that defines the distance
* metric.
* @param label the label of the vector in the index.
* @param blob binary representation of the second vector. Blob size should match the index data
* type and dimension, and pre-normalized if needed.
* @return The distance (according to the index's distance metric) between `blob` and the vector
* with label label`.
*/
double VecSimIndex_GetDistanceFrom_Unsafe(VecSimIndex *index, size_t label, const void *blob);
/**
* @brief normalize the vector blob in place.
* @param blob binary representation of a vector. Blob size should match the specified type and
* dimension.
* @param dim vector dimension.
* @param type vector type.
*/
void VecSim_Normalize(void *blob, size_t dim, VecSimType type);
/**
* @brief Returns the required blob size for a query vector that will be normalized.
*
* For INT8/UINT8 vectors with Cosine metric, VecSim_Normalize appends the norm (a float)
* at the end of the blob, so the required size is larger than just dim * sizeof(type).
*
* @param type vector element type.
* @param dim vector dimension.
* @param metric distance metric.
* @return required blob size in bytes.
*/
size_t VecSimParams_GetQueryBlobSize(VecSimType type, size_t dim, VecSimMetric metric);
/**
* @brief Return the number of vectors in the index.
* @param index the index whose size is requested.
* @return index size.
*/
size_t VecSimIndex_IndexSize(VecSimIndex *index);
/**
* @brief Resolves VecSimRawParam array and generate VecSimQueryParams struct.
* @param index the index whose size is requested.
* @param rparams array of raw params to resolve.
* @param paramNum number of params in rparams (or number of parames in rparams to resolve).
* @param qparams pointer to VecSimQueryParams struct to set.
* @param query_type indicates if query is hybrid, range or "standard" VSS query.
* @return VecSim_OK if the resolve was successful, VecSimResolveCode error code if not.
*/
VecSimResolveCode VecSimIndex_ResolveParams(VecSimIndex *index, VecSimRawParam *rparams,
int paramNum, VecSimQueryParams *qparams,
VecsimQueryType query_type);
/**
* @brief Search for the k closest vectors to a given vector in the index. The results can be
* ordered by their score or id.
* @param index the index to query in.
* @param queryBlob binary representation of the query vector. Blob size should match the index data
* type and dimension.
* @param k the number of "nearest neighbours" to return (upper bound).
* @param queryParams run time params for the search, which are algorithm-specific.
* @param order the criterion to sort the results list by it. Options are by score, or by id.
* @return An opaque object the represents a list of results. User can access the id and score
* (which is the distance according to the index metric) of every result through
* VecSimQueryReply_Iterator.
*/
VecSimQueryReply *VecSimIndex_TopKQuery(VecSimIndex *index, const void *queryBlob, size_t k,
VecSimQueryParams *queryParams, VecSimQueryReply_Order);
/**
* @brief Search for the vectors that are in a given range in the index with respect to a given
* vector. The results can be ordered by their score or id.
* @param index the index to query in.
* @param queryBlob binary representation of the query vector. Blob size should match the index data
* type and dimension.
* @param radius the radius around the query vector to search vectors within it.
* @param queryParams run time params for the search, which are algorithm-specific.
* @param order the criterion to sort the results list by it. Options are by score, or by id.
* @return An opaque object the represents a list of results. User can access the id and score
* (which is the distance according to the index metric) of every result through
* VecSimQueryReply_Iterator.
*/
VecSimQueryReply *VecSimIndex_RangeQuery(VecSimIndex *index, const void *queryBlob, double radius,
VecSimQueryParams *queryParams, VecSimQueryReply_Order);
/**
* @brief Return index information.
* @param index the index to return its info.
* @return Index general and specific meta-data.
*/
VecSimIndexDebugInfo VecSimIndex_DebugInfo(VecSimIndex *index);
/**
* @brief Return basic immutable index information.
* @param index the index to return its info.
* @return Index basic meta-data.
*/
VecSimIndexBasicInfo VecSimIndex_BasicInfo(VecSimIndex *index);
/**
* @brief Return statistics information.
* @param index the index to return its info.
* @return Index statistic data.
*/
VecSimIndexStatsInfo VecSimIndex_StatsInfo(VecSimIndex *index);
/**
* @brief Returns an info iterator for generic reply purposes.
*
* @param index this index to return its info.
* @return VecSimDebugInfoIterator* An iterable containing the index general and specific meta-data.
*/
VecSimDebugInfoIterator *VecSimIndex_DebugInfoIterator(VecSimIndex *index);
/**
* @brief Create a new batch iterator for a specific index, for a specific query vector,
* using the Index_BatchIteratorNew method of the index. Should be released with
* VecSimBatchIterator_Free call.
* @param index the index in which the search will be done (in batches)
* @param queryBlob binary representation of the vector. Blob size should match the index data type
* and dimension.
* @param queryParams run time params for the search, which are algorithm-specific.
* @return Fresh batch iterator
*/
VecSimBatchIterator *VecSimBatchIterator_New(VecSimIndex *index, const void *queryBlob,
VecSimQueryParams *queryParams);
/**
* @brief Run async garbage collection for tiered async index.
*/
void VecSimTieredIndex_GC(VecSimIndex *index);
/**
* @brief Return True if heuristics says that it is better to use ad-hoc brute-force
* search over the index instead of using batch iterator.
*
* @param subsetSize the estimated number of vectors in the index that pass the filter
* (that is, query results can be only from a subset of vector of this size).
*
* @param k the number of required results to return from the query.
*
* @param initial_check flag to indicate if this check is performed for the first time (upon
* creating the hybrid iterator), or after running batches.
*/
bool VecSimIndex_PreferAdHocSearch(VecSimIndex *index, size_t subsetSize, size_t k,
bool initial_check);
/**
* @brief Acquire/Release the required locks of the tiered index externally before executing an
* an unsafe *READ* operation (as the locks are acquired for shared ownership).
* @param index the tiered index to protect (no nothing for non-tiered indexes).
*/
void VecSimTieredIndex_AcquireSharedLocks(VecSimIndex *index);
void VecSimTieredIndex_ReleaseSharedLocks(VecSimIndex *index);
/**
* @brief Allow 3rd party memory functions to be used for memory management.
*
* @param memoryfunctions VecSimMemoryFunctions struct.
*/
void VecSim_SetMemoryFunctions(VecSimMemoryFunctions memoryfunctions);
/**
* @brief Allow 3rd party timeout callback to be used for limiting runtime of a query.
*
* @param callback timeoutCallbackFunction function. should get void* and return int.
*/
void VecSim_SetTimeoutCallbackFunction(timeoutCallbackFunction callback);
/**
* @brief Allow 3rd party log callback to be used for logging.
*
* @param callback logCallbackFunction function. should get void* and return void.
*/
void VecSim_SetLogCallbackFunction(logCallbackFunction callback);
/**
* @brief Set the context for logging (e.g., test name or file name).
*
* @param test_name the name of the test.
* @param test_type the type of the test (e.g., "unit" or "flow").
*/
void VecSim_SetTestLogContext(const char *test_name, const char *test_type);
/**
* @brief Allow 3rd party to set the write mode for tiered index - async insert/delete using
* background jobs, or insert/delete inplace.
* @note In tiered index scenario, should be called from main thread only !! (that is, the thread
* that is calling add/delete vector functions).
*
* @param mode VecSimWriteMode the mode in which we add/remove vectors (async or in-place).
*/
void VecSim_SetWriteMode(VecSimWriteMode mode);
#ifdef __cplusplus
}
#endif