@@ -56,6 +56,7 @@ template <concepts::VectorElement StorageT, typename MetricT> class HNSWIndex {
5656 size_t ef_construction = 200 ; // /< Exploration factor during construction (100-500)
5757 float ml_factor = 1 .0f / std::log(2 .0f ); // /< Layer selection multiplier (1/ln(2))
5858 MetricT metric{}; // /< Distance metric (operates on float spans)
59+ bool clamp_negative_distances = true ; // /< Clamp negative distances to 0 (safe for L2/cosine)
5960
6061 // / Create config optimized for high recall on large corpora
6162 // / @param corpus_size Expected number of vectors
@@ -812,7 +813,8 @@ template <concepts::VectorElement StorageT, typename MetricT> class HNSWIndex {
812813 size_t greedy_search_layer_locked (std::span<const float > query, size_t entry_point,
813814 size_t layer, const FilterFn* filter) const {
814815 // Small negative threshold to handle floating-point error in distance calculation
815- constexpr float kDistanceEpsilon = -1e-5f ;
816+ const float kDistanceEpsilon =
817+ config_.clamp_negative_distances ? -1e-5f : std::numeric_limits<float >::lowest ();
816818
817819 size_t current = entry_point;
818820 float current_dist = distance (query, current);
@@ -903,9 +905,12 @@ template <concepts::VectorElement StorageT, typename MetricT> class HNSWIndex {
903905 // Note: entry_dist can be slightly negative due to floating-point error
904906 // (e.g., cosine distance of identical vectors = 1 - 1.0000001 = -1e-7)
905907 // We allow small negative values to avoid missing exact matches.
906- constexpr float kDistanceEpsilon = -1e-5f ;
908+ const float kDistanceEpsilon =
909+ config_.clamp_negative_distances ? -1e-5f : std::numeric_limits<float >::lowest ();
907910 if (passes_filter (entry_point) && entry_dist >= kDistanceEpsilon ) {
908- top_candidates.emplace (std::max (0 .0f , entry_dist), entry_point);
911+ float entry_score =
912+ config_.clamp_negative_distances ? std::max (0 .0f , entry_dist) : entry_dist;
913+ top_candidates.emplace (entry_score, entry_point);
909914 }
910915
911916 while (!candidates.empty ()) {
@@ -958,12 +963,18 @@ template <concepts::VectorElement StorageT, typename MetricT> class HNSWIndex {
958963 neighbor_dist < top_candidates.top ().first ;
959964
960965 if (should_explore) {
961- candidates.emplace (std::max (0 .0f , neighbor_dist), neighbor);
966+ float candidate_score = config_.clamp_negative_distances
967+ ? std::max (0 .0f , neighbor_dist)
968+ : neighbor_dist;
969+ candidates.emplace (candidate_score, neighbor);
962970 }
963971
964972 if (passes_filter (neighbor)) {
965973 if (top_candidates.size () < ef || neighbor_dist < top_candidates.top ().first ) {
966- top_candidates.emplace (std::max (0 .0f , neighbor_dist), neighbor);
974+ float top_score = config_.clamp_negative_distances
975+ ? std::max (0 .0f , neighbor_dist)
976+ : neighbor_dist;
977+ top_candidates.emplace (top_score, neighbor);
967978 if (top_candidates.size () > ef) {
968979 top_candidates.pop ();
969980 }
0 commit comments