From 14a4b27e6e7cc5855aed81eee62c71444f812987 Mon Sep 17 00:00:00 2001
From: "Corey J. Nolet" <cjnolet@gmail.com>
Date: Tue, 2 Jun 2026 15:30:30 -0400
Subject: [PATCH 01/10] Update Fern and reorganize guide navigation

---
 fern/build_docs.sh                            | 14 +++++++++
 fern/docs.yml                                 | 30 ++++++++++++++-----
 fern/fern.config.json                         |  2 +-
 fern/pages/advanced_topics.md                 | 12 ++++----
 fern/pages/api_guide.md                       |  2 +-
 fern/pages/c_guidelines.md                    |  4 +--
 fern/pages/cpp_guidelines.md                  |  2 +-
 fern/pages/developer_guide.md                 |  8 ++---
 fern/pages/java_guidelines.md                 |  2 +-
 fern/pages/jit_compilation.md                 |  6 ++--
 fern/pages/python_guidelines.md               |  2 +-
 fern/pages/udf_usage.md                       |  4 +--
 fern/pages/user_guide.md                      |  8 ++---
 fern/pages/user_guide/integration_patterns.md |  2 +-
 14 files changed, 63 insertions(+), 35 deletions(-)

diff --git a/fern/build_docs.sh b/fern/build_docs.sh
index 3dd001814b..58613e5ae6 100755
--- a/fern/build_docs.sh
+++ b/fern/build_docs.sh
@@ -52,8 +52,22 @@ require_node_22() {
 
 require_node_22
 
+fern_config_version() {
+  python3 - "${SCRIPT_DIR}/fern.config.json" <<'PY'
+import json
+import sys
+
+with open(sys.argv[1], encoding="utf-8") as f:
+    print(json.load(f).get("version", "*"))
+PY
+}
+
+FERN_VERSION="$(fern_config_version)"
+
 if [[ -n "${FERN_CLI:-}" ]]; then
   FERN_CMD=("${FERN_CLI}")
+elif [[ "${FERN_VERSION}" != "*" ]]; then
+  FERN_CMD=("npx" "--yes" "fern-api@${FERN_VERSION}")
 elif command -v fern >/dev/null 2>&1; then
   FERN_CMD=("fern")
 else
diff --git a/fern/docs.yml b/fern/docs.yml
index 6a7260306d..52fba65ceb 100644
--- a/fern/docs.yml
+++ b/fern/docs.yml
@@ -30,6 +30,20 @@ redirects:
     destination: "/cuvs/developer-guide/coding-guidelines/cpp-guidelines"
   - source: "/cuvs/developer-guide/guidelines/python-guidelines"
     destination: "/cuvs/developer-guide/coding-guidelines/python-guidelines"
+  - source: "/cuvs/user-guide/compatibility"
+    destination: "/cuvs/user-guide/field-guide/compatibility"
+  - source: "/cuvs/user-guide/advanced-topics"
+    destination: "/cuvs/user-guide/field-guide"
+  - source: "/cuvs/user-guide/advanced-topics/jit-compilation"
+    destination: "/cuvs/user-guide/field-guide/jit-compilation"
+  - source: "/cuvs/user-guide/advanced-topics/udf-usage"
+    destination: "/cuvs/user-guide/field-guide/udf-usage"
+  - source: "/cuvs/developer-guide/advanced-topics/jit-compilation"
+    destination: "/cuvs/user-guide/field-guide/jit-compilation"
+  - source: "/cuvs/developer-guide/abi-stability"
+    destination: "/cuvs/developer-guide/advanced-topics/abi-stability"
+  - source: "/cuvs/developer-guide/link-time-optimization"
+    destination: "/cuvs/developer-guide/advanced-topics/link-time-optimization"
 layout:
   searchbar-placement: "header"
   page-width: "1376px"
@@ -206,13 +220,13 @@ navigation:
               - page: "Wiki-all Dataset"
                 hidden: true
                 path: "./pages/cuvs_bench/wiki_all_dataset.md"
-      - page: "Compatibility"
-        path: "./pages/user_guide/abi_stability.md"
       - page: "Integration Patterns"
         path: "./pages/user_guide/integration_patterns.md"
-      - section: "Advanced Topics"
+      - section: "Field Guide"
         path: "./pages/advanced_topics.md"
         contents:
+          - page: "Compatibility"
+            path: "./pages/user_guide/abi_stability.md"
           - page: "JIT Compilation"
             path: "./pages/jit_compilation.md"
           - page: "UDF Usage"
@@ -233,10 +247,12 @@ navigation:
             path: "./pages/java_guidelines.md"
           - page: "Python Guidelines"
             path: "./pages/python_guidelines.md"
-      - page: "ABI Stability"
-        path: "./developer_guide/abi_stability.md"
-      - page: "Link-time Optimization"
-        path: "./pages/jit_lto_guide.md"
+      - section: "Advanced Topics"
+        contents:
+          - page: "ABI Stability"
+            path: "./developer_guide/abi_stability.md"
+          - page: "Link-time Optimization"
+            path: "./pages/jit_lto_guide.md"
       - page: "Contributing"
         path: "./pages/contributing.md"
   - section: "API Reference"
diff --git a/fern/fern.config.json b/fern/fern.config.json
index 08c703bf54..5495dca9a7 100644
--- a/fern/fern.config.json
+++ b/fern/fern.config.json
@@ -1,4 +1,4 @@
 {
   "organization": "nvidia",
-  "version": "*"
+  "version": "5.44.3"
 }
diff --git a/fern/pages/advanced_topics.md b/fern/pages/advanced_topics.md
index 8d96e3752d..081e9e335a 100644
--- a/fern/pages/advanced_topics.md
+++ b/fern/pages/advanced_topics.md
@@ -1,13 +1,13 @@
 ---
-slug: user-guide/advanced-topics
+slug: user-guide/field-guide
 ---
 
-# Advanced Topics
+# Field Guide
 
-Use these pages when working on specialized NVIDIA cuVS development topics that need lower-level implementation context, runtime behavior, or platform-specific guidance.
+Use these pages when working with NVIDIA cuVS compatibility, runtime compilation, and specialized extension points that need a little extra operational context.
 
 ## Topic Guides
 
-- [ABI Stability](/developer-guide/abi-stability): understand ABI expectations for developer-facing APIs and downstream integrations.
-- [JIT Compilation](/developer-guide/advanced-topics/jit-compilation): understand when NVIDIA cuVS triggers just-in-time compilation, how caches behave, and how to warm up JIT-compiled kernels.
-- [Link-time Optimization](/developer-guide/link-time-optimization): use JIT LTO for CUDA compilation, fragment generation, and runtime linking workflows.
+- [Compatibility](/user-guide/field-guide/compatibility): understand NVIDIA cuVS release compatibility, ABI windows, and stable binary boundaries.
+- [JIT Compilation](/user-guide/field-guide/jit-compilation): understand when NVIDIA cuVS triggers just-in-time compilation, how caches behave, and how to warm up JIT-compiled kernels.
+- [UDF Usage](/user-guide/field-guide/udf-usage): supply custom CUDA distance metrics for IVF-flat search.
diff --git a/fern/pages/api_guide.md b/fern/pages/api_guide.md
index 37ceb1cc89..411d8c02c8 100644
--- a/fern/pages/api_guide.md
+++ b/fern/pages/api_guide.md
@@ -6,7 +6,7 @@ slug: user-guide/api-guides
 
 Use these pages to find task-focused NVIDIA cuVS API examples for clustering, vector indexing, preprocessing, and supporting routines.
 
-NVIDIA cuVS is written in C++ at its core and wrapped by a stable C API layer. The Python, Java, Rust, and Go bindings use that C layer so they can share the same ABI boundary; see [Compatibility](/user-guide/compatibility) for why that matters. These API guides are intended for general use and include examples for supported programming languages where possible, but some guides document C++ concepts explicitly because all NVIDIA cuVS algorithm implementations are C++ at the core.
+NVIDIA cuVS is written in C++ at its core and wrapped by a stable C API layer. The Python, Java, Rust, and Go bindings use that C layer so they can share the same ABI boundary; see [Compatibility](/user-guide/field-guide/compatibility) for why that matters. These API guides are intended for general use and include examples for supported programming languages where possible, but some guides document C++ concepts explicitly because all NVIDIA cuVS algorithm implementations are C++ at the core.
 
 ## Common Types
 
diff --git a/fern/pages/c_guidelines.md b/fern/pages/c_guidelines.md
index d040f177d1..8e4426e13c 100644
--- a/fern/pages/c_guidelines.md
+++ b/fern/pages/c_guidelines.md
@@ -29,7 +29,7 @@ Prefer explicit create and destroy functions for every opaque object that owns m
 
 ### API Stability
 
-The C API is the stable boundary used by downstream integrations and NVIDIA cuVS language bindings. Add new functions or fields before removing old ones, avoid changing the meaning of existing parameters, and keep [ABI compatibility](/developer-guide/abi-stability) in mind when changing public structs or exported symbols.
+The C API is the stable boundary used by downstream integrations and NVIDIA cuVS language bindings. Add new functions or fields before removing old ones, avoid changing the meaning of existing parameters, and keep [ABI compatibility](/developer-guide/advanced-topics/abi-stability) in mind when changing public structs or exported symbols.
 
 ### Stateless C APIs
 
@@ -131,7 +131,7 @@ Single-GPU C APIs should not require communication libraries or multi-GPU setup.
 
 C APIs may call implementations that use JIT link-time optimization, but the C wrapper should not duplicate JIT LTO policy or expose C++ implementation details. Keep runtime behavior documented at the API level when JIT compilation can affect first-call latency or cache behavior.
 
-For runtime and cache behavior, see [JIT Compilation](/developer-guide/advanced-topics/jit-compilation). For implementation guidance, see [Link-time Optimization](/developer-guide/link-time-optimization).
+For runtime and cache behavior, see [JIT Compilation](/user-guide/field-guide/jit-compilation). For implementation guidance, see [Link-time Optimization](/developer-guide/advanced-topics/link-time-optimization).
 
 ## Coding Style
 
diff --git a/fern/pages/cpp_guidelines.md b/fern/pages/cpp_guidelines.md
index d96e786897..218309843a 100644
--- a/fern/pages/cpp_guidelines.md
+++ b/fern/pages/cpp_guidelines.md
@@ -256,7 +256,7 @@ void foo(raft::resources const& res)
 
 NVIDIA cuVS is moving new kernels toward JIT link-time optimization. Instead of compiling every kernel variant into the binary, JIT LTO compiles fragments and links the needed combination at runtime.
 
-This helps reduce binary size and enables user-defined functions in NVIDIA cuVS CUDA kernels. For runtime and cache behavior, see [JIT Compilation](/developer-guide/advanced-topics/jit-compilation). For implementation guidance, see [Link-time Optimization](/developer-guide/link-time-optimization).
+This helps reduce binary size and enables user-defined functions in NVIDIA cuVS CUDA kernels. For runtime and cache behavior, see [JIT Compilation](/user-guide/field-guide/jit-compilation). For implementation guidance, see [Link-time Optimization](/developer-guide/advanced-topics/link-time-optimization).
 
 ## Coding Style
 
diff --git a/fern/pages/developer_guide.md b/fern/pages/developer_guide.md
index a72aa09881..0691b17bc8 100644
--- a/fern/pages/developer_guide.md
+++ b/fern/pages/developer_guide.md
@@ -14,12 +14,10 @@ Use these pages when contributing to NVIDIA cuVS or working on integrations that
 - [Java Guidelines](/developer-guide/coding-guidelines/java-guidelines): follow NVIDIA cuVS Java API design, native binding, packaging, and resource-management conventions.
 - [Python Guidelines](/developer-guide/coding-guidelines/python-guidelines): follow NVIDIA cuVS Python API, packaging, and binding conventions.
 
-## ABI Stability
+## Advanced Topics
 
-- [Advanced Topics](/user-guide/advanced-topics): find specialized development topics and low-level implementation guidance.
-- [ABI Stability](/developer-guide/abi-stability): understand ABI expectations for developer-facing APIs and downstream integrations.
-- [JIT Compilation](/developer-guide/advanced-topics/jit-compilation): understand when NVIDIA cuVS triggers just-in-time compilation and how runtime caches behave.
-- [Link-time Optimization](/developer-guide/link-time-optimization): use JIT LTO for CUDA compilation and linking workflows.
+- [ABI Stability](/developer-guide/advanced-topics/abi-stability): understand ABI expectations for developer-facing APIs and downstream integrations.
+- [Link-time Optimization](/developer-guide/advanced-topics/link-time-optimization): use JIT LTO for CUDA compilation and linking workflows.
 
 ## Contributing
 
diff --git a/fern/pages/java_guidelines.md b/fern/pages/java_guidelines.md
index 1865e4b737..4c535562f0 100644
--- a/fern/pages/java_guidelines.md
+++ b/fern/pages/java_guidelines.md
@@ -33,7 +33,7 @@ Keep generated Panama bindings and native `MemorySegment` details out of the pub
 
 Java APIs are consumed by downstream applications and should change carefully. Add new methods or overloads before removing old ones, preserve existing builder behavior where possible, and avoid changing defaults in ways that silently alter search quality, memory use, or native resource ownership.
 
-The Java bindings should call the NVIDIA cuVS C APIs rather than C++ or CUDA implementation details directly. The C layer is the ABI-stable boundary for bindings, so changes that require new native behavior should usually start with the C API. See [ABI Stability](/developer-guide/abi-stability) for more detail.
+The Java bindings should call the NVIDIA cuVS C APIs rather than C++ or CUDA implementation details directly. The C layer is the ABI-stable boundary for bindings, so changes that require new native behavior should usually start with the C API. See [ABI Stability](/developer-guide/advanced-topics/abi-stability) for more detail.
 
 ### Resource Lifecycle
 
diff --git a/fern/pages/jit_compilation.md b/fern/pages/jit_compilation.md
index 7a294a87b1..e3c210788f 100644
--- a/fern/pages/jit_compilation.md
+++ b/fern/pages/jit_compilation.md
@@ -1,5 +1,5 @@
 ---
-slug: developer-guide/advanced-topics/jit-compilation
+slug: user-guide/field-guide/jit-compilation
 ---
 
 # JIT Compilation
@@ -17,6 +17,6 @@ The following NVIDIA cuVS capabilities currently trigger JIT compilation:
 
 - IVF-Flat search APIs: [cuvs::neighbors::ivf_flat::search()](/api-reference/cpp-api-neighbors-ivf-flat)
 
-Custom distance metrics (UDFs) for IVF-flat search also use JIT compilation. See [UDF Usage](udf_usage.md).
+Custom distance metrics (UDFs) for IVF-flat search also use JIT compilation. See [UDF Usage](/user-guide/field-guide/udf-usage).
 
-For implementation details on building JIT LTO kernel fragments and linking them at runtime, see [Link-time Optimization](jit_lto_guide.md).
+For implementation details on building JIT LTO kernel fragments and linking them at runtime, see [Link-time Optimization](/developer-guide/advanced-topics/link-time-optimization).
diff --git a/fern/pages/python_guidelines.md b/fern/pages/python_guidelines.md
index 85e800f683..570ce7d4a4 100644
--- a/fern/pages/python_guidelines.md
+++ b/fern/pages/python_guidelines.md
@@ -16,7 +16,7 @@ If source builds are not being used, install the local RAFT Python artifacts int
 
 ### Bindings
 
-Python APIs, like all other NVIDIA cuVS language bindings, should wrap the C APIs and should not call C++ or CUDA implementation code directly. The C layer is the ABI-stable boundary for bindings, so ABI compatibility needs to be maintained there. See [ABI Stability](/developer-guide/abi-stability) for more detail.
+Python APIs, like all other NVIDIA cuVS language bindings, should wrap the C APIs and should not call C++ or CUDA implementation code directly. The C layer is the ABI-stable boundary for bindings, so ABI compatibility needs to be maintained there. See [ABI Stability](/developer-guide/advanced-topics/abi-stability) for more detail.
 
 Keep Cython bindings focused on translating Python inputs into NVIDIA cuVS calls and returning Python-friendly outputs. Heavy algorithmic work should stay in C++ or CUDA implementation code.
 
diff --git a/fern/pages/udf_usage.md b/fern/pages/udf_usage.md
index 137add871c..1b238fb63c 100644
--- a/fern/pages/udf_usage.md
+++ b/fern/pages/udf_usage.md
@@ -4,7 +4,7 @@
 
 ## What this feature does
 
-You can supply **your own CUDA device code** that defines how distance accumulates between a query vector and database vectors **inside the IVF-flat interleaved scan** (the fine search over lists). Technical background on compilation and linking is in [Link-time Optimization](jit_lto_guide.md).
+You can supply **your own CUDA device code** that defines how distance accumulates between a query vector and database vectors **inside the IVF-flat interleaved scan** (the fine search over lists). Technical background on compilation and linking is in [Link-time Optimization](/developer-guide/advanced-topics/link-time-optimization).
 
 ## Available via C++ APIs
 
@@ -63,4 +63,4 @@ More examples: `cpp/tests/neighbors/ann_ivf_flat/test_udf.cu`.
 ## Further reading
 
 - C++ API reference: [neighbors::ivf_flat](/api-reference/cpp-api-neighbors-ivf-flat)
-- JIT LTO architecture and IVF-flat fragments: [Link-time Optimization](jit_lto_guide.md)
+- JIT LTO architecture and IVF-flat fragments: [Link-time Optimization](/developer-guide/advanced-topics/link-time-optimization)
diff --git a/fern/pages/user_guide.md b/fern/pages/user_guide.md
index fc1f99f16a..3047d92d30 100644
--- a/fern/pages/user_guide.md
+++ b/fern/pages/user_guide.md
@@ -60,14 +60,14 @@ Use these guides when you are ready to apply NVIDIA cuVS APIs, benchmark algorit
 - [cuVS Bench Datasets](/user-guide/benchmarking-guide/cu-vs-bench-tool/datasets): prepare datasets, ground truth, binary files, and dataset descriptors.
 - [cuVS Bench Backends](/user-guide/benchmarking-guide/cu-vs-bench-tool/backends): understand and extend backend integrations for benchmark execution.
 
-## Compatibility and Integration
+## Integration
 
-- [Compatibility](user_guide/abi_stability.md): understand cuVS release compatibility, ABI windows, and stable binary boundaries.
 - [Integration Patterns](user_guide/integration_patterns.md): compare direct, offloaded, and service-oriented ways to integrate cuVS into products.
 
-## Advanced Topics
+## Field Guide
 
-- [Advanced Topics](advanced_topics.md): find specialized usage topics and low-level implementation guidance.
+- [Field Guide](advanced_topics.md): find compatibility, runtime compilation, and specialized extension topics.
+- [Compatibility](user_guide/abi_stability.md): understand cuVS release compatibility, ABI windows, and stable binary boundaries.
 - [JIT Compilation](jit_compilation.md): understand when cuVS triggers just-in-time compilation and how runtime caches behave.
 - [UDF Usage](udf_usage.md): supply custom CUDA distance metrics for IVF-flat search (C++ only, experimental).
 
diff --git a/fern/pages/user_guide/integration_patterns.md b/fern/pages/user_guide/integration_patterns.md
index e06a53d980..f6e4348e9a 100644
--- a/fern/pages/user_guide/integration_patterns.md
+++ b/fern/pages/user_guide/integration_patterns.md
@@ -32,7 +32,7 @@ Products that integrate at the binary level should prefer the NVIDIA cuVS C APIs
 
 ABI stability is especially useful for databases, search engines, language bindings, and packaged applications. It allows vendors to build against one compatible NVIDIA cuVS release while giving users or package managers flexibility to install a newer runtime from the same ABI compatibility window.
 
-For compatibility rules, release windows, and shared library naming, see [Compatibility](/user-guide/compatibility).
+For compatibility rules, release windows, and shared library naming, see [Compatibility](/user-guide/field-guide/compatibility).
 
 ## Choosing a pattern
 

From 8feffa0ce88fd3d864872a7ffda339ccf603f3fc Mon Sep 17 00:00:00 2001
From: "Corey J. Nolet" <cjnolet@gmail.com>
Date: Tue, 2 Jun 2026 16:43:28 -0400
Subject: [PATCH 02/10] Move integration patterns under field guide

---
 fern/docs.yml                                 | 6 ++++--
 fern/pages/advanced_topics.md                 | 1 +
 fern/pages/user_guide.md                      | 5 +----
 fern/pages/user_guide/integration_patterns.md | 4 ++++
 fern/pages/what_is_vector_search.md           | 2 +-
 5 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/fern/docs.yml b/fern/docs.yml
index 52fba65ceb..196b8a441d 100644
--- a/fern/docs.yml
+++ b/fern/docs.yml
@@ -32,6 +32,8 @@ redirects:
     destination: "/cuvs/developer-guide/coding-guidelines/python-guidelines"
   - source: "/cuvs/user-guide/compatibility"
     destination: "/cuvs/user-guide/field-guide/compatibility"
+  - source: "/cuvs/user-guide/integration-patterns"
+    destination: "/cuvs/user-guide/field-guide/integration-patterns"
   - source: "/cuvs/user-guide/advanced-topics"
     destination: "/cuvs/user-guide/field-guide"
   - source: "/cuvs/user-guide/advanced-topics/jit-compilation"
@@ -220,11 +222,11 @@ navigation:
               - page: "Wiki-all Dataset"
                 hidden: true
                 path: "./pages/cuvs_bench/wiki_all_dataset.md"
-      - page: "Integration Patterns"
-        path: "./pages/user_guide/integration_patterns.md"
       - section: "Field Guide"
         path: "./pages/advanced_topics.md"
         contents:
+          - page: "Integration Patterns"
+            path: "./pages/user_guide/integration_patterns.md"
           - page: "Compatibility"
             path: "./pages/user_guide/abi_stability.md"
           - page: "JIT Compilation"
diff --git a/fern/pages/advanced_topics.md b/fern/pages/advanced_topics.md
index 081e9e335a..ce700036a8 100644
--- a/fern/pages/advanced_topics.md
+++ b/fern/pages/advanced_topics.md
@@ -8,6 +8,7 @@ Use these pages when working with NVIDIA cuVS compatibility, runtime compilation
 
 ## Topic Guides
 
+- [Integration Patterns](/user-guide/field-guide/integration-patterns): compare direct, offloaded, and service-oriented ways to integrate NVIDIA cuVS into products.
 - [Compatibility](/user-guide/field-guide/compatibility): understand NVIDIA cuVS release compatibility, ABI windows, and stable binary boundaries.
 - [JIT Compilation](/user-guide/field-guide/jit-compilation): understand when NVIDIA cuVS triggers just-in-time compilation, how caches behave, and how to warm up JIT-compiled kernels.
 - [UDF Usage](/user-guide/field-guide/udf-usage): supply custom CUDA distance metrics for IVF-flat search.
diff --git a/fern/pages/user_guide.md b/fern/pages/user_guide.md
index 3047d92d30..9677cda564 100644
--- a/fern/pages/user_guide.md
+++ b/fern/pages/user_guide.md
@@ -60,13 +60,10 @@ Use these guides when you are ready to apply NVIDIA cuVS APIs, benchmark algorit
 - [cuVS Bench Datasets](/user-guide/benchmarking-guide/cu-vs-bench-tool/datasets): prepare datasets, ground truth, binary files, and dataset descriptors.
 - [cuVS Bench Backends](/user-guide/benchmarking-guide/cu-vs-bench-tool/backends): understand and extend backend integrations for benchmark execution.
 
-## Integration
-
-- [Integration Patterns](user_guide/integration_patterns.md): compare direct, offloaded, and service-oriented ways to integrate cuVS into products.
-
 ## Field Guide
 
 - [Field Guide](advanced_topics.md): find compatibility, runtime compilation, and specialized extension topics.
+- [Integration Patterns](/user-guide/field-guide/integration-patterns): compare direct, offloaded, and service-oriented ways to integrate cuVS into products.
 - [Compatibility](user_guide/abi_stability.md): understand cuVS release compatibility, ABI windows, and stable binary boundaries.
 - [JIT Compilation](jit_compilation.md): understand when cuVS triggers just-in-time compilation and how runtime caches behave.
 - [UDF Usage](udf_usage.md): supply custom CUDA distance metrics for IVF-flat search (C++ only, experimental).
diff --git a/fern/pages/user_guide/integration_patterns.md b/fern/pages/user_guide/integration_patterns.md
index f6e4348e9a..af8f3aabca 100644
--- a/fern/pages/user_guide/integration_patterns.md
+++ b/fern/pages/user_guide/integration_patterns.md
@@ -1,3 +1,7 @@
+---
+slug: user-guide/field-guide/integration-patterns
+---
+
 # Integration Patterns
 
 NVIDIA cuVS is used in several different ways across vector databases, search engines, data platforms, and application libraries. Some products call NVIDIA cuVS directly inside the same process. Others offload expensive index builds to a separate service, container, or serverless worker, then load the resulting index back into the serving system.
diff --git a/fern/pages/what_is_vector_search.md b/fern/pages/what_is_vector_search.md
index bb76d447b9..1fb5c96360 100644
--- a/fern/pages/what_is_vector_search.md
+++ b/fern/pages/what_is_vector_search.md
@@ -120,7 +120,7 @@ GPU acceleration can help across the vector-search pipeline, not only during que
 
 Some systems use GPUs only where they are most valuable. For example, a graph index can be built quickly on the GPU and converted to a CPU-searchable format such as HNSW, or a database can offload index builds to a GPU worker while keeping serving in its existing CPU runtime. These hybrid patterns let products shorten ingest or rebuild time without requiring every query-serving node to have a GPU.
 
-For more integration details, see [Hybrid GPU-build and CPU-search](/user-guide/integration-patterns#hybrid-gpu-build-and-cpu-search) and [Offloaded index builds](/user-guide/integration-patterns#offloaded-index-builds).
+For more integration details, see [Hybrid GPU-build and CPU-search](/user-guide/field-guide/integration-patterns#hybrid-gpu-build-and-cpu-search) and [Offloaded index builds](/user-guide/field-guide/integration-patterns#offloaded-index-builds).
 
 ## Choosing Index Types
 

From 066d806cc09b829c9eff1becf729069337bdcb3a Mon Sep 17 00:00:00 2001
From: "Corey J. Nolet" <cjnolet@gmail.com>
Date: Tue, 2 Jun 2026 17:28:06 -0400
Subject: [PATCH 03/10] Update field guide docs

---
 fern/docs.yml                 |  4 ++--
 fern/pages/advanced_topics.md |  4 ++--
 fern/pages/jit_compilation.md | 19 +++++++++++++++----
 3 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/fern/docs.yml b/fern/docs.yml
index 196b8a441d..932b4eaf94 100644
--- a/fern/docs.yml
+++ b/fern/docs.yml
@@ -225,10 +225,10 @@ navigation:
       - section: "Field Guide"
         path: "./pages/advanced_topics.md"
         contents:
-          - page: "Integration Patterns"
-            path: "./pages/user_guide/integration_patterns.md"
           - page: "Compatibility"
             path: "./pages/user_guide/abi_stability.md"
+          - page: "Integration Patterns"
+            path: "./pages/user_guide/integration_patterns.md"
           - page: "JIT Compilation"
             path: "./pages/jit_compilation.md"
           - page: "UDF Usage"
diff --git a/fern/pages/advanced_topics.md b/fern/pages/advanced_topics.md
index ce700036a8..28ead9da77 100644
--- a/fern/pages/advanced_topics.md
+++ b/fern/pages/advanced_topics.md
@@ -4,11 +4,11 @@ slug: user-guide/field-guide
 
 # Field Guide
 
-Use these pages when working with NVIDIA cuVS compatibility, runtime compilation, and specialized extension points that need a little extra operational context.
+The Field Guide collects practical material for users integrating NVIDIA cuVS into libraries, databases, and production systems, as well as users running NVIDIA cuVS directly in standalone workflows. These pages cover compatibility, integration patterns, runtime compilation, and extension points that are useful once you move beyond the basic API guides.
 
 ## Topic Guides
 
-- [Integration Patterns](/user-guide/field-guide/integration-patterns): compare direct, offloaded, and service-oriented ways to integrate NVIDIA cuVS into products.
 - [Compatibility](/user-guide/field-guide/compatibility): understand NVIDIA cuVS release compatibility, ABI windows, and stable binary boundaries.
+- [Integration Patterns](/user-guide/field-guide/integration-patterns): compare direct, offloaded, and service-oriented ways to integrate NVIDIA cuVS into products.
 - [JIT Compilation](/user-guide/field-guide/jit-compilation): understand when NVIDIA cuVS triggers just-in-time compilation, how caches behave, and how to warm up JIT-compiled kernels.
 - [UDF Usage](/user-guide/field-guide/udf-usage): supply custom CUDA distance metrics for IVF-flat search.
diff --git a/fern/pages/jit_compilation.md b/fern/pages/jit_compilation.md
index e3c210788f..32ed507559 100644
--- a/fern/pages/jit_compilation.md
+++ b/fern/pages/jit_compilation.md
@@ -8,14 +8,25 @@ NVIDIA cuVS uses Just-in-Time (JIT) [Link-Time Optimization (LTO)](https://devel
 
 The cache validity is:
 
-1. In-memory cache is valid for the lifetime of the process.
-2. On-disk cache is valid until a CUDA driver upgrade is performed. The cache can be shared between machines through network or cloud storage, and we recommend storing it in a persistent location. For more details on configuring the on-disk cache, see the CUDA documentation on [JIT Compilation](https://docs.nvidia.com/cuda/cuda-programming-guide/05-appendices/environment-variables.html#jit-compilation). The most relevant environment variables are `CUDA_CACHE_PATH` and `CUDA_CACHE_MAX_SIZE`.
+1. In-memory cache: lifetime of the process.
+2. On-disk cache: until a CUDA driver upgrade is performed. The cache can be shared between machines through network or cloud storage, and we recommend storing it in a persistent location. For more details on configuring the on-disk cache, see the CUDA documentation on [JIT Compilation](https://docs.nvidia.com/cuda/cuda-programming-guide/05-appendices/environment-variables.html#jit-compilation). The most relevant environment variables are `CUDA_CACHE_PATH` and `CUDA_CACHE_MAX_SIZE`.
 
 JIT compilation is a one-time cost for a given kernel configuration. After the first compilation, you should not expect a steady-state performance loss. For latency-sensitive workflows, run a warmup step before the actual workload so the relevant kernels are compiled and cached ahead of time.
 
-The following NVIDIA cuVS capabilities currently trigger JIT compilation:
+The following public NVIDIA cuVS C++ APIs currently trigger JIT compilation. The search entries include single-GPU overloads and multi-GPU overloads where those overloads are exposed.
 
-- IVF-Flat search APIs: [cuvs::neighbors::ivf_flat::search()](/api-reference/cpp-api-neighbors-ivf-flat)
+- [cuvs::neighbors::cagra::search()](/api-reference/cpp-api-neighbors-cagra)
+- [cuvs::neighbors::ivf_flat::search()](/api-reference/cpp-api-neighbors-ivf-flat)
+- [cuvs::neighbors::ivf_pq::search()](/api-reference/cpp-api-neighbors-ivf-pq)
+- [cuvs::neighbors::ivf_sq::search()](/api-reference/cpp-api-neighbors-ivf-sq)
+
+The following C++ APIs can also trigger JIT compilation when they call one of the search paths above internally:
+
+- [cuvs::neighbors::cagra::build()](/api-reference/cpp-api-neighbors-cagra) when graph construction uses `graph_build_params::ivf_pq_params` or `graph_build_params::iterative_search_params`
+- [cuvs::neighbors::cagra::extend()](/api-reference/cpp-api-neighbors-cagra) when adding nodes, because the extension path searches the existing CAGRA graph
+- [cuvs::neighbors::composite::composite_index::search()](/api-reference/cpp-api-neighbors-composite-index) when the composite index searches its CAGRA child indexes
+- [cuvs::neighbors::tiered_index::search()](/api-reference/cpp-api-neighbors-tiered-index) when the tiered index is backed by CAGRA, IVF-Flat, or IVF-PQ
+- [cuvs::neighbors::all_neighbors::build()](/api-reference/cpp-api-neighbors-all-neighbors) when `graph_build_params` uses IVF-PQ
 
 Custom distance metrics (UDFs) for IVF-flat search also use JIT compilation. See [UDF Usage](/user-guide/field-guide/udf-usage).
 

From 6ff921edd7280b897dbafc4e0af79b0d04148660 Mon Sep 17 00:00:00 2001
From: "Corey J. Nolet" <cjnolet@gmail.com>
Date: Tue, 2 Jun 2026 18:11:37 -0400
Subject: [PATCH 04/10] Update fern/pages/advanced_topics.md

Co-authored-by: Divye Gala <divyegala@gmail.com>
---
 fern/pages/advanced_topics.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fern/pages/advanced_topics.md b/fern/pages/advanced_topics.md
index 28ead9da77..046dcb6858 100644
--- a/fern/pages/advanced_topics.md
+++ b/fern/pages/advanced_topics.md
@@ -11,4 +11,4 @@ The Field Guide collects practical material for users integrating NVIDIA cuVS in
 - [Compatibility](/user-guide/field-guide/compatibility): understand NVIDIA cuVS release compatibility, ABI windows, and stable binary boundaries.
 - [Integration Patterns](/user-guide/field-guide/integration-patterns): compare direct, offloaded, and service-oriented ways to integrate NVIDIA cuVS into products.
 - [JIT Compilation](/user-guide/field-guide/jit-compilation): understand when NVIDIA cuVS triggers just-in-time compilation, how caches behave, and how to warm up JIT-compiled kernels.
-- [UDF Usage](/user-guide/field-guide/udf-usage): supply custom CUDA distance metrics for IVF-flat search.
+- [UDF Usage](/user-guide/field-guide/udf-usage): supply custom CUDA User-Defined-Functions to cuVS APIs

From 729e9d23091eede1a8599d18fcd01de8ab2bbb86 Mon Sep 17 00:00:00 2001
From: "Corey J. Nolet" <cjnolet@gmail.com>
Date: Tue, 2 Jun 2026 18:24:15 -0400
Subject: [PATCH 05/10] Fix Fern API reference formatting

---
 fern/docs.yml                                 |   2 +-
 fern/pages/c_api/c-api-cluster-kmeans.md      |  40 +--
 fern/pages/c_api/c-api-core-c-api.md          |  92 +++----
 .../c_api/c-api-distance-pairwise-distance.md |   4 +-
 .../c_api/c-api-neighbors-all-neighbors.md    |  12 +-
 .../c_api/c-api-neighbors-brute-force.md      |  35 +--
 fern/pages/c_api/c-api-neighbors-cagra.md     | 126 +++++-----
 fern/pages/c_api/c-api-neighbors-hnsw.md      |  75 +++---
 fern/pages/c_api/c-api-neighbors-ivf-flat.md  |  68 ++---
 fern/pages/c_api/c-api-neighbors-ivf-pq.md    | 121 ++++-----
 fern/pages/c_api/c-api-neighbors-ivf-sq.md    |  71 +++---
 fern/pages/c_api/c-api-neighbors-mg-cagra.md  |  48 ++--
 .../c_api/c-api-neighbors-mg-ivf-flat.md      |  48 ++--
 fern/pages/c_api/c-api-neighbors-mg-ivf-pq.md |  48 ++--
 .../pages/c_api/c-api-neighbors-nn-descent.md |  41 +--
 fern/pages/c_api/c-api-neighbors-refine.md    |   4 +-
 .../c_api/c-api-neighbors-tiered-index.md     |  37 +--
 fern/pages/c_api/c-api-neighbors-vamana.md    |  28 +--
 fern/pages/c_api/c-api-preprocessing-pca.md   |  24 +-
 .../c-api-preprocessing-quantize-binary.md    |  28 +--
 .../c_api/c-api-preprocessing-quantize-pq.md  |  52 ++--
 .../c-api-preprocessing-quantize-scalar.md    |  28 +--
 .../cpp_api/cpp-api-cluster-agglomerative.md  |   4 +-
 fern/pages/cpp_api/cpp-api-cluster-kmeans.md  |  26 +-
 .../cpp_api/cpp-api-distance-distance.md      |  16 +-
 .../cpp-api-neighbors-all-neighbors.md        |  12 +-
 .../cpp_api/cpp-api-neighbors-brute-force.md  |   8 +-
 fern/pages/cpp_api/cpp-api-neighbors-cagra.md |  68 ++---
 .../cpp-api-neighbors-dynamic-batching.md     |   4 +-
 .../cpp-api-neighbors-epsilon-neighborhood.md |   2 +-
 fern/pages/cpp_api/cpp-api-neighbors-hnsw.md  |  30 ++-
 .../pages/cpp_api/cpp-api-neighbors-ivf-pq.md |   6 +-
 .../pages/cpp_api/cpp-api-neighbors-ivf-sq.md |  72 +++---
 .../cpp_api/cpp-api-neighbors-nn-descent.md   |  54 ++--
 .../pages/cpp_api/cpp-api-neighbors-refine.md |  20 +-
 .../pages/cpp_api/cpp-api-neighbors-vamana.md |  12 +-
 .../cpp_api/cpp-api-preprocessing-pca.md      |   4 +-
 .../cpp-api-preprocessing-quantize-pq.md      |   4 +-
 .../cpp_api/cpp-api-selection-select-k.md     |  18 +-
 .../cpp_api/cpp-api-stats-silhouette-score.md |   8 +-
 .../cpp-api-stats-trustworthiness-score.md    |   4 +-
 .../{advanced_topics.md => field_guide.md}    |   0
 fern/pages/jit_compilation.md                 |   8 -
 .../python_api/python-api-cluster-kmeans.md   |   2 +-
 fern/pages/python_api/python-api-common.md    |   2 +-
 .../python-api-neighbors-all-neighbors.md     |   2 +-
 .../python_api/python-api-neighbors-cagra.md  |  10 +-
 .../python_api/python-api-neighbors-hnsw.md   |   6 +-
 .../python-api-neighbors-ivf-flat.md          |   2 +-
 .../python_api/python-api-neighbors-ivf-pq.md |  14 +-
 .../python_api/python-api-neighbors-ivf-sq.md |  18 +-
 .../python-api-neighbors-mg-cagra.md          |   2 +-
 .../python-api-neighbors-mg-ivf-flat.md       |   2 +-
 .../python-api-neighbors-mg-ivf-pq.md         |   2 +-
 .../python-api-neighbors-tiered-index.md      |   2 +-
 .../python-api-preprocessing-quantize-pq.md   |   4 +-
 fern/pages/user_guide.md                      |  69 +-----
 fern/scripts/generate_api_reference.py        | 233 +++++++++++++++---
 58 files changed, 981 insertions(+), 801 deletions(-)
 rename fern/pages/{advanced_topics.md => field_guide.md} (100%)

diff --git a/fern/docs.yml b/fern/docs.yml
index 932b4eaf94..adc7034efd 100644
--- a/fern/docs.yml
+++ b/fern/docs.yml
@@ -223,7 +223,7 @@ navigation:
                 hidden: true
                 path: "./pages/cuvs_bench/wiki_all_dataset.md"
       - section: "Field Guide"
-        path: "./pages/advanced_topics.md"
+        path: "./pages/field_guide.md"
         contents:
           - page: "Compatibility"
             path: "./pages/user_guide/abi_stability.md"
diff --git a/fern/pages/c_api/c-api-cluster-kmeans.md b/fern/pages/c_api/c-api-cluster-kmeans.md
index 90d093d140..2af8b259bc 100644
--- a/fern/pages/c_api/c-api-cluster-kmeans.md
+++ b/fern/pages/c_api/c-api-cluster-kmeans.md
@@ -63,7 +63,7 @@ struct cuvsKMeansParams {
 | `tol` | `double` | Relative tolerance with regards to inertia to declare convergence. |
 | `n_init` | `int` | Number of instance k-means algorithm will be run with different seeds. |
 | `oversampling_factor` | `double` | Oversampling factor for use in the k-means\|\| algorithm |
-| `batch_samples` | `int` | batch_samples and batch_centroids are used to tile 1NN computation which is useful to optimize/control the memory footprint Default tile is [batch_samples x n_clusters] i.e. when batch_centroids is 0 then don't tile the centroids |
+| `batch_samples` | `int` | batch_samples and batch_centroids are used to tile 1NN computation which is useful to optimize/control the memory footprint<br />Default tile is [batch_samples x n_clusters] i.e. when batch_centroids is 0 then don't tile the centroids |
 | `batch_centroids` | `int` | if 0 then batch_centroids = n_clusters |
 | `inertia_check` | `bool` | Deprecated, ignored. Kept for ABI compatibility. |
 | `hierarchical` | `bool` | Whether to use hierarchical (balanced) kmeans or not |
@@ -105,7 +105,7 @@ struct cuvsKMeansParams_v2 {
 | `tol` | `double` | Relative tolerance with regards to inertia to declare convergence. |
 | `n_init` | `int` | Number of instance k-means algorithm will be run with different seeds. |
 | `oversampling_factor` | `double` | Oversampling factor for use in the k-means\|\| algorithm |
-| `batch_samples` | `int` | batch_samples and batch_centroids are used to tile 1NN computation which is useful to optimize/control the memory footprint Default tile is [batch_samples x n_clusters] i.e. when batch_centroids is 0 then don't tile the centroids |
+| `batch_samples` | `int` | batch_samples and batch_centroids are used to tile 1NN computation which is useful to optimize/control the memory footprint<br />Default tile is [batch_samples x n_clusters] i.e. when batch_centroids is 0 then don't tile the centroids |
 | `batch_centroids` | `int` | if 0 then batch_centroids = n_clusters |
 | `hierarchical` | `bool` | Whether to use hierarchical (balanced) kmeans or not |
 | `hierarchical_n_iters` | `int` | For hierarchical k-means , defines the number of training iterations |
@@ -119,7 +119,7 @@ struct cuvsKMeansParams_v2 {
 Allocate KMeans params, and populate with default values
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsKMeansParamsCreate(cuvsKMeansParams_t* params);
+cuvsError_t cuvsKMeansParamsCreate(cuvsKMeansParams_t* params);
 ```
 
 **Note:** In cuVS 26.08 (next ABI major version) this signature will be<br />replaced by cuvsKMeansParamsCreate_v2.
@@ -132,7 +132,7 @@ CUVS_EXPORT cuvsError_t cuvsKMeansParamsCreate(cuvsKMeansParams_t* params);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvskmeansparamsdestroy"></a>
 ### cuvsKMeansParamsDestroy
@@ -140,7 +140,7 @@ CUVS_EXPORT cuvsError_t cuvsKMeansParamsCreate(cuvsKMeansParams_t* params);
 De-allocate KMeans params
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsKMeansParamsDestroy(cuvsKMeansParams_t params);
+cuvsError_t cuvsKMeansParamsDestroy(cuvsKMeansParams_t params);
 ```
 
 **Note:** In cuVS 26.08 (next ABI major version) this signature will be<br />replaced by cuvsKMeansParamsDestroy_v2.
@@ -153,7 +153,7 @@ CUVS_EXPORT cuvsError_t cuvsKMeansParamsDestroy(cuvsKMeansParams_t params);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvskmeansparamscreate-v2"></a>
 ### cuvsKMeansParamsCreate_v2
@@ -161,7 +161,7 @@ CUVS_EXPORT cuvsError_t cuvsKMeansParamsDestroy(cuvsKMeansParams_t params);
 Allocate KMeans params
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsKMeansParamsCreate_v2(cuvsKMeansParams_v2_t* params);
+cuvsError_t cuvsKMeansParamsCreate_v2(cuvsKMeansParams_v2_t* params);
 ```
 
 Mirrors cuvsKMeansParamsCreate but operates on cuvsKMeansParams_v2. Will become the unsuffixed cuvsKMeansParamsCreate in cuVS 26.08.
@@ -174,7 +174,7 @@ Mirrors cuvsKMeansParamsCreate but operates on cuvsKMeansParams_v2. Will become
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvskmeansparamsdestroy-v2"></a>
 ### cuvsKMeansParamsDestroy_v2
@@ -182,7 +182,7 @@ Mirrors cuvsKMeansParamsCreate but operates on cuvsKMeansParams_v2. Will become
 De-allocate KMeans params allocated by cuvsKMeansParamsCreate_v2.
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsKMeansParamsDestroy_v2(cuvsKMeansParams_v2_t params);
+cuvsError_t cuvsKMeansParamsDestroy_v2(cuvsKMeansParams_v2_t params);
 ```
 
 **Parameters**
@@ -193,7 +193,7 @@ CUVS_EXPORT cuvsError_t cuvsKMeansParamsDestroy_v2(cuvsKMeansParams_v2_t params)
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvskmeanstype"></a>
 ### cuvsKMeansType
@@ -222,7 +222,7 @@ typedef enum {
 Find clusters with k-means algorithm.
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsKMeansFit(cuvsResources_t res,
+cuvsError_t cuvsKMeansFit(cuvsResources_t res,
 cuvsKMeansParams_t params,
 DLManagedTensor* X,
 DLManagedTensor* sample_weight,
@@ -251,7 +251,7 @@ X may reside on either host (CPU) or device (GPU) memory. When X is on the host
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvskmeansfit-v2"></a>
 ### cuvsKMeansFit_v2
@@ -259,7 +259,7 @@ X may reside on either host (CPU) or device (GPU) memory. When X is on the host
 Find clusters with k-means algorithm (v2 params layout).
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsKMeansFit_v2(cuvsResources_t res,
+cuvsError_t cuvsKMeansFit_v2(cuvsResources_t res,
 cuvsKMeansParams_v2_t params,
 DLManagedTensor* X,
 DLManagedTensor* sample_weight,
@@ -284,7 +284,7 @@ Mirrors cuvsKMeansFit but takes cuvsKMeansParams_v2_t. Will become the unsuffixe
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvskmeanspredict"></a>
 ### cuvsKMeansPredict
@@ -292,7 +292,7 @@ Mirrors cuvsKMeansFit but takes cuvsKMeansParams_v2_t. Will become the unsuffixe
 Predict the closest cluster each sample in X belongs to.
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsKMeansPredict(cuvsResources_t res,
+cuvsError_t cuvsKMeansPredict(cuvsResources_t res,
 cuvsKMeansParams_t params,
 DLManagedTensor* X,
 DLManagedTensor* sample_weight,
@@ -319,7 +319,7 @@ double* inertia);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvskmeanspredict-v2"></a>
 ### cuvsKMeansPredict_v2
@@ -327,7 +327,7 @@ double* inertia);
 Predict the closest cluster each sample in X belongs to (v2 params layout).
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsKMeansPredict_v2(cuvsResources_t res,
+cuvsError_t cuvsKMeansPredict_v2(cuvsResources_t res,
 cuvsKMeansParams_v2_t params,
 DLManagedTensor* X,
 DLManagedTensor* sample_weight,
@@ -354,7 +354,7 @@ Mirrors cuvsKMeansPredict but takes cuvsKMeansParams_v2_t. Will become the unsuf
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvskmeansclustercost"></a>
 ### cuvsKMeansClusterCost
@@ -362,7 +362,7 @@ Mirrors cuvsKMeansPredict but takes cuvsKMeansParams_v2_t. Will become the unsuf
 Compute cluster cost
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsKMeansClusterCost(cuvsResources_t res,
+cuvsError_t cuvsKMeansClusterCost(cuvsResources_t res,
 DLManagedTensor* X,
 DLManagedTensor* centroids,
 double* cost);
@@ -379,4 +379,4 @@ double* cost);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
diff --git a/fern/pages/c_api/c-api-core-c-api.md b/fern/pages/c_api/c-api-core-c-api.md
index 21f85540b2..14a14ce46b 100644
--- a/fern/pages/c_api/c-api-core-c-api.md
+++ b/fern/pages/c_api/c-api-core-c-api.md
@@ -33,12 +33,12 @@ typedef enum {
 Returns a string describing the last seen error on this thread, or NULL if the last function succeeded.
 
 ```c
-CUVS_EXPORT const char* cuvsGetLastErrorText();
+const char* cuvsGetLastErrorText();
 ```
 
 **Returns**
 
-`CUVS_EXPORT const char*`
+`const char*`
 
 <a id="cuvssetlasterrortext"></a>
 ### cuvsSetLastErrorText
@@ -46,7 +46,7 @@ CUVS_EXPORT const char* cuvsGetLastErrorText();
 Sets a string describing an error seen on the thread. Passing NULL clears any previously seen error message.
 
 ```c
-CUVS_EXPORT void cuvsSetLastErrorText(const char* error);
+void cuvsSetLastErrorText(const char* error);
 ```
 
 **Parameters**
@@ -57,7 +57,7 @@ CUVS_EXPORT void cuvsSetLastErrorText(const char* error);
 
 **Returns**
 
-`CUVS_EXPORT void`
+`void`
 
 ## cuVS Logging
 
@@ -96,12 +96,12 @@ typedef enum {
 Returns the current log level
 
 ```c
-CUVS_EXPORT cuvsLogLevel_t cuvsGetLogLevel();
+cuvsLogLevel_t cuvsGetLogLevel();
 ```
 
 **Returns**
 
-[`CUVS_EXPORT cuvsLogLevel_t`](/api-reference/c-api-core-c-api#cuvsloglevel-t)
+[`cuvsLogLevel_t`](/api-reference/c-api-core-c-api#cuvsloglevel-t)
 
 <a id="cuvssetloglevel"></a>
 ### cuvsSetLogLevel
@@ -109,7 +109,7 @@ CUVS_EXPORT cuvsLogLevel_t cuvsGetLogLevel();
 Sets the log level
 
 ```c
-CUVS_EXPORT void cuvsSetLogLevel(cuvsLogLevel_t);
+void cuvsSetLogLevel(cuvsLogLevel_t);
 ```
 
 **Parameters**
@@ -120,7 +120,7 @@ CUVS_EXPORT void cuvsSetLogLevel(cuvsLogLevel_t);
 
 **Returns**
 
-`CUVS_EXPORT void`
+`void`
 
 ## cuVS Resources Handle
 
@@ -139,7 +139,7 @@ typedef uintptr_t cuvsResources_t;
 Create an Initialized opaque C handle for C++ type `raft::resources`
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsResourcesCreate(cuvsResources_t* res);
+cuvsError_t cuvsResourcesCreate(cuvsResources_t* res);
 ```
 
 **Parameters**
@@ -150,7 +150,7 @@ CUVS_EXPORT cuvsError_t cuvsResourcesCreate(cuvsResources_t* res);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsresourcesdestroy"></a>
 ### cuvsResourcesDestroy
@@ -158,7 +158,7 @@ CUVS_EXPORT cuvsError_t cuvsResourcesCreate(cuvsResources_t* res);
 Destroy and de-allocate opaque C handle for C++ type `raft::resources`
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsResourcesDestroy(cuvsResources_t res);
+cuvsError_t cuvsResourcesDestroy(cuvsResources_t res);
 ```
 
 **Parameters**
@@ -169,7 +169,7 @@ CUVS_EXPORT cuvsError_t cuvsResourcesDestroy(cuvsResources_t res);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsstreamset"></a>
 ### cuvsStreamSet
@@ -177,7 +177,7 @@ CUVS_EXPORT cuvsError_t cuvsResourcesDestroy(cuvsResources_t res);
 Set cudaStream_t on cuvsResources_t to queue CUDA kernels on APIs that accept a cuvsResources_t handle
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsStreamSet(cuvsResources_t res, cudaStream_t stream);
+cuvsError_t cuvsStreamSet(cuvsResources_t res, cudaStream_t stream);
 ```
 
 **Parameters**
@@ -189,7 +189,7 @@ CUVS_EXPORT cuvsError_t cuvsStreamSet(cuvsResources_t res, cudaStream_t stream);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsstreamget"></a>
 ### cuvsStreamGet
@@ -197,7 +197,7 @@ CUVS_EXPORT cuvsError_t cuvsStreamSet(cuvsResources_t res, cudaStream_t stream);
 Get the cudaStream_t from a cuvsResources_t
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsStreamGet(cuvsResources_t res, cudaStream_t* stream);
+cuvsError_t cuvsStreamGet(cuvsResources_t res, cudaStream_t* stream);
 ```
 
 **Parameters**
@@ -209,7 +209,7 @@ CUVS_EXPORT cuvsError_t cuvsStreamGet(cuvsResources_t res, cudaStream_t* stream)
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsstreamsync"></a>
 ### cuvsStreamSync
@@ -217,7 +217,7 @@ CUVS_EXPORT cuvsError_t cuvsStreamGet(cuvsResources_t res, cudaStream_t* stream)
 Syncs the current CUDA stream on the resources object
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsStreamSync(cuvsResources_t res);
+cuvsError_t cuvsStreamSync(cuvsResources_t res);
 ```
 
 **Parameters**
@@ -228,7 +228,7 @@ CUVS_EXPORT cuvsError_t cuvsStreamSync(cuvsResources_t res);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsdeviceidget"></a>
 ### cuvsDeviceIdGet
@@ -236,7 +236,7 @@ CUVS_EXPORT cuvsError_t cuvsStreamSync(cuvsResources_t res);
 Get the id of the device associated with this cuvsResources_t
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsDeviceIdGet(cuvsResources_t res, int* device_id);
+cuvsError_t cuvsDeviceIdGet(cuvsResources_t res, int* device_id);
 ```
 
 **Parameters**
@@ -248,7 +248,7 @@ CUVS_EXPORT cuvsError_t cuvsDeviceIdGet(cuvsResources_t res, int* device_id);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsmultigpuresourcescreate"></a>
 ### cuvsMultiGpuResourcesCreate
@@ -256,7 +256,7 @@ CUVS_EXPORT cuvsError_t cuvsDeviceIdGet(cuvsResources_t res, int* device_id);
 Create an Initialized opaque C handle for C++ type `raft::device_resources_snmg` for multi-GPU operations
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsMultiGpuResourcesCreate(cuvsResources_t* res);
+cuvsError_t cuvsMultiGpuResourcesCreate(cuvsResources_t* res);
 ```
 
 **Parameters**
@@ -267,7 +267,7 @@ CUVS_EXPORT cuvsError_t cuvsMultiGpuResourcesCreate(cuvsResources_t* res);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsmultigpuresourcescreatewithdeviceids"></a>
 ### cuvsMultiGpuResourcesCreateWithDeviceIds
@@ -275,7 +275,7 @@ CUVS_EXPORT cuvsError_t cuvsMultiGpuResourcesCreate(cuvsResources_t* res);
 Create an Initialized opaque C handle for C++ type `raft::device_resources_snmg` for multi-GPU operations with specific device IDs
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsMultiGpuResourcesCreateWithDeviceIds(cuvsResources_t* res,
+cuvsError_t cuvsMultiGpuResourcesCreateWithDeviceIds(cuvsResources_t* res,
 DLManagedTensor* device_ids);
 ```
 
@@ -288,7 +288,7 @@ DLManagedTensor* device_ids);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsmultigpuresourcesdestroy"></a>
 ### cuvsMultiGpuResourcesDestroy
@@ -296,7 +296,7 @@ DLManagedTensor* device_ids);
 Destroy and de-allocate opaque C handle for C++ type `raft::device_resources_snmg`
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsMultiGpuResourcesDestroy(cuvsResources_t res);
+cuvsError_t cuvsMultiGpuResourcesDestroy(cuvsResources_t res);
 ```
 
 **Parameters**
@@ -307,7 +307,7 @@ CUVS_EXPORT cuvsError_t cuvsMultiGpuResourcesDestroy(cuvsResources_t res);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsmultigpuresourcessetmemorypool"></a>
 ### cuvsMultiGpuResourcesSetMemoryPool
@@ -315,7 +315,7 @@ CUVS_EXPORT cuvsError_t cuvsMultiGpuResourcesDestroy(cuvsResources_t res);
 Set a memory pool on all devices managed by the multi-GPU resources
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsMultiGpuResourcesSetMemoryPool(cuvsResources_t res, int percent_of_free_memory);
+cuvsError_t cuvsMultiGpuResourcesSetMemoryPool(cuvsResources_t res, int percent_of_free_memory);
 ```
 
 **Parameters**
@@ -327,7 +327,7 @@ CUVS_EXPORT cuvsError_t cuvsMultiGpuResourcesSetMemoryPool(cuvsResources_t res,
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 ## cuVS Memory Allocation
 
@@ -337,7 +337,7 @@ CUVS_EXPORT cuvsError_t cuvsMultiGpuResourcesSetMemoryPool(cuvsResources_t res,
 Allocates device memory using RMM
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsRMMAlloc(cuvsResources_t res, void** ptr, size_t bytes);
+cuvsError_t cuvsRMMAlloc(cuvsResources_t res, void** ptr, size_t bytes);
 ```
 
 **Parameters**
@@ -350,7 +350,7 @@ CUVS_EXPORT cuvsError_t cuvsRMMAlloc(cuvsResources_t res, void** ptr, size_t byt
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsrmmfree"></a>
 ### cuvsRMMFree
@@ -358,7 +358,7 @@ CUVS_EXPORT cuvsError_t cuvsRMMAlloc(cuvsResources_t res, void** ptr, size_t byt
 Deallocates device memory using RMM
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsRMMFree(cuvsResources_t res, void* ptr, size_t bytes);
+cuvsError_t cuvsRMMFree(cuvsResources_t res, void* ptr, size_t bytes);
 ```
 
 **Parameters**
@@ -371,7 +371,7 @@ CUVS_EXPORT cuvsError_t cuvsRMMFree(cuvsResources_t res, void* ptr, size_t bytes
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsrmmpoolmemoryresourceenable"></a>
 ### cuvsRMMPoolMemoryResourceEnable
@@ -379,7 +379,7 @@ CUVS_EXPORT cuvsError_t cuvsRMMFree(cuvsResources_t res, void* ptr, size_t bytes
 Switches the working memory resource to use the RMM pool memory resource, which will bypass unnecessary synchronizations by allocating a chunk of device memory up front and carving that up for temporary memory allocations within algorithms. Be aware that this function will change the memory resource for the whole process and the new memory resource will be used until explicitly changed.
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsRMMPoolMemoryResourceEnable(int initial_pool_size_percent,
+cuvsError_t cuvsRMMPoolMemoryResourceEnable(int initial_pool_size_percent,
 int max_pool_size_percent,
 bool managed);
 ```
@@ -394,7 +394,7 @@ bool managed);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsrmmmemoryresourcereset"></a>
 ### cuvsRMMMemoryResourceReset
@@ -402,12 +402,12 @@ bool managed);
 Resets the memory resource to use the default memory resource (cuda_memory_resource)
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsRMMMemoryResourceReset();
+cuvsError_t cuvsRMMMemoryResourceReset();
 ```
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsrmmhostalloc"></a>
 ### cuvsRMMHostAlloc
@@ -415,7 +415,7 @@ CUVS_EXPORT cuvsError_t cuvsRMMMemoryResourceReset();
 Allocates pinned memory on the host using RMM
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsRMMHostAlloc(void** ptr, size_t bytes);
+cuvsError_t cuvsRMMHostAlloc(void** ptr, size_t bytes);
 ```
 
 **Parameters**
@@ -427,7 +427,7 @@ CUVS_EXPORT cuvsError_t cuvsRMMHostAlloc(void** ptr, size_t bytes);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsrmmhostfree"></a>
 ### cuvsRMMHostFree
@@ -435,7 +435,7 @@ CUVS_EXPORT cuvsError_t cuvsRMMHostAlloc(void** ptr, size_t bytes);
 Deallocates pinned memory on the host using RMM
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsRMMHostFree(void* ptr, size_t bytes);
+cuvsError_t cuvsRMMHostFree(void* ptr, size_t bytes);
 ```
 
 **Parameters**
@@ -447,7 +447,7 @@ CUVS_EXPORT cuvsError_t cuvsRMMHostFree(void* ptr, size_t bytes);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsversionget"></a>
 ### cuvsVersionGet
@@ -455,7 +455,7 @@ CUVS_EXPORT cuvsError_t cuvsRMMHostFree(void* ptr, size_t bytes);
 Get the version of the cuVS library
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsVersionGet(uint16_t* major, uint16_t* minor, uint16_t* patch);
+cuvsError_t cuvsVersionGet(uint16_t* major, uint16_t* minor, uint16_t* patch);
 ```
 
 **Parameters**
@@ -468,7 +468,7 @@ CUVS_EXPORT cuvsError_t cuvsVersionGet(uint16_t* major, uint16_t* minor, uint16_
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsmatrixcopy"></a>
 ### cuvsMatrixCopy
@@ -476,7 +476,7 @@ CUVS_EXPORT cuvsError_t cuvsVersionGet(uint16_t* major, uint16_t* minor, uint16_
 Copy a matrix
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsMatrixCopy(cuvsResources_t res, DLManagedTensor* src, DLManagedTensor* dst);
+cuvsError_t cuvsMatrixCopy(cuvsResources_t res, DLManagedTensor* src, DLManagedTensor* dst);
 ```
 
 This function copies a matrix from dst to src. This lets you copy a matrix from device memory to host memory (or vice versa), while accounting for differences in strides.
@@ -493,7 +493,7 @@ Both src and dst must have the same shape and dtype, but can have different stri
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsmatrixslicerows"></a>
 ### cuvsMatrixSliceRows
@@ -501,7 +501,7 @@ Both src and dst must have the same shape and dtype, but can have different stri
 Slices rows from a matrix
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsMatrixSliceRows(
+cuvsError_t cuvsMatrixSliceRows(
 cuvsResources_t res, DLManagedTensor* src, int64_t start, int64_t end, DLManagedTensor* dst);
 ```
 
@@ -517,4 +517,4 @@ cuvsResources_t res, DLManagedTensor* src, int64_t start, int64_t end, DLManaged
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
diff --git a/fern/pages/c_api/c-api-distance-pairwise-distance.md b/fern/pages/c_api/c-api-distance-pairwise-distance.md
index daeed917f8..98bd15ba96 100644
--- a/fern/pages/c_api/c-api-distance-pairwise-distance.md
+++ b/fern/pages/c_api/c-api-distance-pairwise-distance.md
@@ -14,7 +14,7 @@ _Source header: `cuvs/distance/pairwise_distance.h`_
 Compute pairwise distances for two matrices
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsPairwiseDistance(cuvsResources_t res,
+cuvsError_t cuvsPairwiseDistance(cuvsResources_t res,
 DLManagedTensor* x,
 DLManagedTensor* y,
 DLManagedTensor* dist,
@@ -37,4 +37,4 @@ Usage example:
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
diff --git a/fern/pages/c_api/c-api-neighbors-all-neighbors.md b/fern/pages/c_api/c-api-neighbors-all-neighbors.md
index 67f23031b6..9c15676edf 100644
--- a/fern/pages/c_api/c-api-neighbors-all-neighbors.md
+++ b/fern/pages/c_api/c-api-neighbors-all-neighbors.md
@@ -62,7 +62,7 @@ struct cuvsAllNeighborsIndexParams {
 Create a default all-neighbors index parameters struct.
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsAllNeighborsIndexParamsCreate(cuvsAllNeighborsIndexParams_t* index_params);
+cuvsError_t cuvsAllNeighborsIndexParamsCreate(cuvsAllNeighborsIndexParams_t* index_params);
 ```
 
 **Parameters**
@@ -73,7 +73,7 @@ CUVS_EXPORT cuvsError_t cuvsAllNeighborsIndexParamsCreate(cuvsAllNeighborsIndexP
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsallneighborsindexparamsdestroy"></a>
 ### cuvsAllNeighborsIndexParamsDestroy
@@ -81,7 +81,7 @@ CUVS_EXPORT cuvsError_t cuvsAllNeighborsIndexParamsCreate(cuvsAllNeighborsIndexP
 Destroy an all-neighbors index parameters struct.
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsAllNeighborsIndexParamsDestroy(cuvsAllNeighborsIndexParams_t index_params);
+cuvsError_t cuvsAllNeighborsIndexParamsDestroy(cuvsAllNeighborsIndexParams_t index_params);
 ```
 
 **Parameters**
@@ -92,7 +92,7 @@ CUVS_EXPORT cuvsError_t cuvsAllNeighborsIndexParamsDestroy(cuvsAllNeighborsIndex
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 ## All-neighbors C-API build
 
@@ -102,7 +102,7 @@ CUVS_EXPORT cuvsError_t cuvsAllNeighborsIndexParamsDestroy(cuvsAllNeighborsIndex
 Build an all-neighbors k-NN graph automatically detecting host vs device dataset.
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsAllNeighborsBuild(cuvsResources_t res,
+cuvsError_t cuvsAllNeighborsBuild(cuvsResources_t res,
 cuvsAllNeighborsIndexParams_t params,
 DLManagedTensor* dataset,
 DLManagedTensor* indices,
@@ -127,4 +127,4 @@ The function automatically detects whether the dataset is host-resident or devic
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
diff --git a/fern/pages/c_api/c-api-neighbors-brute-force.md b/fern/pages/c_api/c-api-neighbors-brute-force.md
index ba43e0f355..0a7fb8a2d8 100644
--- a/fern/pages/c_api/c-api-neighbors-brute-force.md
+++ b/fern/pages/c_api/c-api-neighbors-brute-force.md
@@ -33,7 +33,7 @@ typedef struct {
 Allocate BRUTEFORCE index
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsBruteForceIndexCreate(cuvsBruteForceIndex_t* index);
+cuvsError_t cuvsBruteForceIndexCreate(cuvsBruteForceIndex_t* index);
 ```
 
 **Parameters**
@@ -44,7 +44,7 @@ CUVS_EXPORT cuvsError_t cuvsBruteForceIndexCreate(cuvsBruteForceIndex_t* index);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsbruteforceindexdestroy"></a>
 ### cuvsBruteForceIndexDestroy
@@ -52,7 +52,7 @@ CUVS_EXPORT cuvsError_t cuvsBruteForceIndexCreate(cuvsBruteForceIndex_t* index);
 De-allocate BRUTEFORCE index
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsBruteForceIndexDestroy(cuvsBruteForceIndex_t index);
+cuvsError_t cuvsBruteForceIndexDestroy(cuvsBruteForceIndex_t index);
 ```
 
 **Parameters**
@@ -63,17 +63,20 @@ CUVS_EXPORT cuvsError_t cuvsBruteForceIndexDestroy(cuvsBruteForceIndex_t index);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 ## Bruteforce index build
 
 <a id="cuvsbruteforcebuild"></a>
 ### cuvsBruteForceBuild
 
-Build a BRUTEFORCE index with a `DLManagedTensor` which has underlying `DLDeviceType` equal to `kDLCUDA`, `kDLCUDAHost`, `kDLCUDAManaged`, or `kDLCPU`. Also, acceptable underlying types are: 1. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32` 2. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 16`
+Build a BRUTEFORCE index with a `DLManagedTensor` which has underlying `DLDeviceType` equal to `kDLCUDA`, `kDLCUDAHost`, `kDLCUDAManaged`, or `kDLCPU`. Also, acceptable underlying types are:
+
+1. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32`
+2. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 16`
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsBruteForceBuild(cuvsResources_t res,
+cuvsError_t cuvsBruteForceBuild(cuvsResources_t res,
 DLManagedTensor* dataset,
 cuvsDistanceType metric,
 float metric_arg,
@@ -92,17 +95,21 @@ cuvsBruteForceIndex_t index);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 ## Bruteforce index search
 
 <a id="cuvsbruteforcesearch"></a>
 ### cuvsBruteForceSearch
 
-Search a BRUTEFORCE index with a `DLManagedTensor` which has underlying `DLDeviceType` equal to `kDLCUDA`, `kDLCUDAHost`, `kDLCUDAManaged`. It is also important to note that the BRUTEFORCE index must have been built with the same type of `queries`, such that `index.dtype.code == queries.dl_tensor.dtype.code` Types for input are: 1. `queries`: `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32` or `kDLDataType.bits = 16` 2. `neighbors`: `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 32` 3. `distances`: `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32`
+Search a BRUTEFORCE index with a `DLManagedTensor` which has underlying `DLDeviceType` equal to `kDLCUDA`, `kDLCUDAHost`, `kDLCUDAManaged`. It is also important to note that the BRUTEFORCE index must have been built with the same type of `queries`, such that `index.dtype.code == queries.dl_tensor.dtype.code` Types for input are:
+
+1. `queries`: `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32` or `kDLDataType.bits = 16`
+2. `neighbors`: `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 32`
+3. `distances`: `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32`
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsBruteForceSearch(cuvsResources_t res,
+cuvsError_t cuvsBruteForceSearch(cuvsResources_t res,
 cuvsBruteForceIndex_t index,
 DLManagedTensor* queries,
 DLManagedTensor* neighbors,
@@ -123,7 +130,7 @@ cuvsFilter prefilter);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 ## BRUTEFORCE C-API serialize functions
 
@@ -133,7 +140,7 @@ cuvsFilter prefilter);
 Save the index to file.
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsBruteForceSerialize(cuvsResources_t res,
+cuvsError_t cuvsBruteForceSerialize(cuvsResources_t res,
 const char* filename,
 cuvsBruteForceIndex_t index);
 ```
@@ -150,7 +157,7 @@ The serialization format can be subject to changes, therefore loading an index s
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsbruteforcedeserialize"></a>
 ### cuvsBruteForceDeserialize
@@ -158,7 +165,7 @@ The serialization format can be subject to changes, therefore loading an index s
 Load index from file.
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsBruteForceDeserialize(cuvsResources_t res,
+cuvsError_t cuvsBruteForceDeserialize(cuvsResources_t res,
 const char* filename,
 cuvsBruteForceIndex_t index);
 ```
@@ -175,4 +182,4 @@ The serialization format can be subject to changes, therefore loading an index s
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
diff --git a/fern/pages/c_api/c-api-neighbors-cagra.md b/fern/pages/c_api/c-api-neighbors-cagra.md
index d268713f06..2ebddfbebd 100644
--- a/fern/pages/c_api/c-api-neighbors-cagra.md
+++ b/fern/pages/c_api/c-api-neighbors-cagra.md
@@ -133,7 +133,7 @@ struct cuvsCagraIndexParams {
 Allocate CAGRA Index params, and populate with default values
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsCagraIndexParamsCreate(cuvsCagraIndexParams_t* params);
+cuvsError_t cuvsCagraIndexParamsCreate(cuvsCagraIndexParams_t* params);
 ```
 
 **Parameters**
@@ -144,7 +144,7 @@ CUVS_EXPORT cuvsError_t cuvsCagraIndexParamsCreate(cuvsCagraIndexParams_t* param
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvscagraindexparamsdestroy"></a>
 ### cuvsCagraIndexParamsDestroy
@@ -152,7 +152,7 @@ CUVS_EXPORT cuvsError_t cuvsCagraIndexParamsCreate(cuvsCagraIndexParams_t* param
 De-allocate CAGRA Index params
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsCagraIndexParamsDestroy(cuvsCagraIndexParams_t params);
+cuvsError_t cuvsCagraIndexParamsDestroy(cuvsCagraIndexParams_t params);
 ```
 
 **Parameters**
@@ -163,7 +163,7 @@ CUVS_EXPORT cuvsError_t cuvsCagraIndexParamsDestroy(cuvsCagraIndexParams_t param
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvscagracompressionparamscreate"></a>
 ### cuvsCagraCompressionParamsCreate
@@ -171,7 +171,7 @@ CUVS_EXPORT cuvsError_t cuvsCagraIndexParamsDestroy(cuvsCagraIndexParams_t param
 Allocate CAGRA Compression params, and populate with default values
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsCagraCompressionParamsCreate(cuvsCagraCompressionParams_t* params);
+cuvsError_t cuvsCagraCompressionParamsCreate(cuvsCagraCompressionParams_t* params);
 ```
 
 **Parameters**
@@ -182,7 +182,7 @@ CUVS_EXPORT cuvsError_t cuvsCagraCompressionParamsCreate(cuvsCagraCompressionPar
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvscagracompressionparamsdestroy"></a>
 ### cuvsCagraCompressionParamsDestroy
@@ -190,7 +190,7 @@ CUVS_EXPORT cuvsError_t cuvsCagraCompressionParamsCreate(cuvsCagraCompressionPar
 De-allocate CAGRA Compression params
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsCagraCompressionParamsDestroy(cuvsCagraCompressionParams_t params);
+cuvsError_t cuvsCagraCompressionParamsDestroy(cuvsCagraCompressionParams_t params);
 ```
 
 **Parameters**
@@ -201,7 +201,7 @@ CUVS_EXPORT cuvsError_t cuvsCagraCompressionParamsDestroy(cuvsCagraCompressionPa
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsaceparamscreate"></a>
 ### cuvsAceParamsCreate
@@ -209,7 +209,7 @@ CUVS_EXPORT cuvsError_t cuvsCagraCompressionParamsDestroy(cuvsCagraCompressionPa
 Allocate ACE params, and populate with default values
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsAceParamsCreate(cuvsAceParams_t* params);
+cuvsError_t cuvsAceParamsCreate(cuvsAceParams_t* params);
 ```
 
 **Parameters**
@@ -220,7 +220,7 @@ CUVS_EXPORT cuvsError_t cuvsAceParamsCreate(cuvsAceParams_t* params);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsaceparamsdestroy"></a>
 ### cuvsAceParamsDestroy
@@ -228,7 +228,7 @@ CUVS_EXPORT cuvsError_t cuvsAceParamsCreate(cuvsAceParams_t* params);
 De-allocate ACE params
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsAceParamsDestroy(cuvsAceParams_t params);
+cuvsError_t cuvsAceParamsDestroy(cuvsAceParams_t params);
 ```
 
 **Parameters**
@@ -239,7 +239,7 @@ CUVS_EXPORT cuvsError_t cuvsAceParamsDestroy(cuvsAceParams_t params);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvscagraindexparamsfromhnswparams"></a>
 ### cuvsCagraIndexParamsFromHnswParams
@@ -247,7 +247,7 @@ CUVS_EXPORT cuvsError_t cuvsAceParamsDestroy(cuvsAceParams_t params);
 Create CAGRA index parameters similar to an HNSW index
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsCagraIndexParamsFromHnswParams(cuvsCagraIndexParams_t params,
+cuvsError_t cuvsCagraIndexParamsFromHnswParams(cuvsCagraIndexParams_t params,
 int64_t n_rows,
 int64_t dim,
 int M,
@@ -272,7 +272,7 @@ This factory function creates CAGRA parameters that yield a graph compatible wit
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 ## C API for CUDA ANN Graph-based nearest neighbor search
 
@@ -299,7 +299,7 @@ struct cuvsCagraExtendParams {
 Allocate CAGRA Extend params, and populate with default values
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsCagraExtendParamsCreate(cuvsCagraExtendParams_t* params);
+cuvsError_t cuvsCagraExtendParamsCreate(cuvsCagraExtendParams_t* params);
 ```
 
 **Parameters**
@@ -310,7 +310,7 @@ CUVS_EXPORT cuvsError_t cuvsCagraExtendParamsCreate(cuvsCagraExtendParams_t* par
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvscagraextendparamsdestroy"></a>
 ### cuvsCagraExtendParamsDestroy
@@ -318,7 +318,7 @@ CUVS_EXPORT cuvsError_t cuvsCagraExtendParamsCreate(cuvsCagraExtendParams_t* par
 De-allocate CAGRA Extend params
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsCagraExtendParamsDestroy(cuvsCagraExtendParams_t params);
+cuvsError_t cuvsCagraExtendParamsDestroy(cuvsCagraExtendParams_t params);
 ```
 
 **Parameters**
@@ -329,15 +329,20 @@ CUVS_EXPORT cuvsError_t cuvsCagraExtendParamsDestroy(cuvsCagraExtendParams_t par
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvscagraextend"></a>
 ### cuvsCagraExtend
 
-Extend a CAGRA index with a `DLManagedTensor` which has underlying `DLDeviceType` equal to `kDLCUDA`, `kDLCUDAHost`, `kDLCUDAManaged`, or `kDLCPU`. Also, acceptable underlying types are: 1. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32` 2. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 16` 3. `kDLDataType.code == kDLInt` and `kDLDataType.bits = 8` 4. `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 8`
+Extend a CAGRA index with a `DLManagedTensor` which has underlying `DLDeviceType` equal to `kDLCUDA`, `kDLCUDAHost`, `kDLCUDAManaged`, or `kDLCPU`. Also, acceptable underlying types are:
+
+1. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32`
+2. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 16`
+3. `kDLDataType.code == kDLInt` and `kDLDataType.bits = 8`
+4. `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 8`
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsCagraExtend(cuvsResources_t res,
+cuvsError_t cuvsCagraExtend(cuvsResources_t res,
 cuvsCagraExtendParams_t params,
 DLManagedTensor* additional_dataset,
 cuvsCagraIndex_t index);
@@ -354,7 +359,7 @@ cuvsCagraIndex_t index);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 ## C API for CUDA ANN Graph-based nearest neighbor search
 
@@ -447,7 +452,7 @@ struct cuvsCagraSearchParams {
 | `rand_xor_mask` | `uint64_t` | Bit mask used for initial random seed node selection. |
 | `persistent` | `bool` | Whether to use the persistent version of the kernel (only SINGLE_CTA is supported a.t.m.) |
 | `persistent_lifetime` | `float` | Persistent kernel: time in seconds before the kernel stops if no requests received. |
-| `persistent_device_usage` | `float` | Set the fraction of maximum grid size used by persistent kernel. Value 1.0 means the kernel grid size is maximum possible for the selected device. The value must be greater than 0.0 and not greater than 1.0.<br /><br />One may need to run other kernels alongside this persistent kernel. This parameter can be used to reduce the grid size of the persistent kernel to leave a few SMs idle. Note: running any other work on GPU alongside with the persistent kernel makes the setup fragile.<br />- Running another kernel in another thread usually works, but no progress guaranteed<br />- Any CUDA allocations block the context (this issue may be obscured by using pools)<br />- Memory copies to not-pinned host memory may block the context<br /><br />Even when we know there are no other kernels working at the same time, setting kDeviceUsage to 1.0 surprisingly sometimes hurts performance. Proceed with care. If you suspect this is an issue, you can reduce this number to ~0.9 without a significant impact on the throughput. |
+| `persistent_device_usage` | `float` | Set the fraction of maximum grid size used by persistent kernel. Value 1.0 means the kernel grid size is maximum possible for the selected device. The value must be greater than 0.0 and not greater than 1.0.<br /><br />One may need to run other kernels alongside this persistent kernel. This parameter can be used to reduce the grid size of the persistent kernel to leave a few SMs idle.<br />Note: running any other work on GPU alongside with the persistent kernel makes the setup fragile.<br />- Running another kernel in another thread usually works, but no progress guaranteed<br />- Any CUDA allocations block the context (this issue may be obscured by using pools)<br />- Memory copies to not-pinned host memory may block the context<br /><br />Even when we know there are no other kernels working at the same time, setting kDeviceUsage to 1.0 surprisingly sometimes hurts performance. Proceed with care. If you suspect this is an issue, you can reduce this number to ~0.9 without a significant impact on the throughput. |
 
 <a id="cuvscagrasearchparamscreate"></a>
 ### cuvsCagraSearchParamsCreate
@@ -455,7 +460,7 @@ struct cuvsCagraSearchParams {
 Allocate CAGRA search params, and populate with default values
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsCagraSearchParamsCreate(cuvsCagraSearchParams_t* params);
+cuvsError_t cuvsCagraSearchParamsCreate(cuvsCagraSearchParams_t* params);
 ```
 
 **Parameters**
@@ -466,7 +471,7 @@ CUVS_EXPORT cuvsError_t cuvsCagraSearchParamsCreate(cuvsCagraSearchParams_t* par
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvscagrasearchparamsdestroy"></a>
 ### cuvsCagraSearchParamsDestroy
@@ -474,7 +479,7 @@ CUVS_EXPORT cuvsError_t cuvsCagraSearchParamsCreate(cuvsCagraSearchParams_t* par
 De-allocate CAGRA search params
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsCagraSearchParamsDestroy(cuvsCagraSearchParams_t params);
+cuvsError_t cuvsCagraSearchParamsDestroy(cuvsCagraSearchParams_t params);
 ```
 
 **Parameters**
@@ -485,7 +490,7 @@ CUVS_EXPORT cuvsError_t cuvsCagraSearchParamsDestroy(cuvsCagraSearchParams_t par
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 ## C API for CUDA ANN Graph-based nearest neighbor search
 
@@ -514,7 +519,7 @@ typedef struct {
 Allocate CAGRA index
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsCagraIndexCreate(cuvsCagraIndex_t* index);
+cuvsError_t cuvsCagraIndexCreate(cuvsCagraIndex_t* index);
 ```
 
 **Parameters**
@@ -525,7 +530,7 @@ CUVS_EXPORT cuvsError_t cuvsCagraIndexCreate(cuvsCagraIndex_t* index);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvscagraindexdestroy"></a>
 ### cuvsCagraIndexDestroy
@@ -533,7 +538,7 @@ CUVS_EXPORT cuvsError_t cuvsCagraIndexCreate(cuvsCagraIndex_t* index);
 De-allocate CAGRA index
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsCagraIndexDestroy(cuvsCagraIndex_t index);
+cuvsError_t cuvsCagraIndexDestroy(cuvsCagraIndex_t index);
 ```
 
 **Parameters**
@@ -544,7 +549,7 @@ CUVS_EXPORT cuvsError_t cuvsCagraIndexDestroy(cuvsCagraIndex_t index);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvscagraindexgetdims"></a>
 ### cuvsCagraIndexGetDims
@@ -552,7 +557,7 @@ CUVS_EXPORT cuvsError_t cuvsCagraIndexDestroy(cuvsCagraIndex_t index);
 Get dimension of the CAGRA index
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsCagraIndexGetDims(cuvsCagraIndex_t index, int64_t* dim);
+cuvsError_t cuvsCagraIndexGetDims(cuvsCagraIndex_t index, int64_t* dim);
 ```
 
 **Parameters**
@@ -564,7 +569,7 @@ CUVS_EXPORT cuvsError_t cuvsCagraIndexGetDims(cuvsCagraIndex_t index, int64_t* d
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvscagraindexgetsize"></a>
 ### cuvsCagraIndexGetSize
@@ -572,7 +577,7 @@ CUVS_EXPORT cuvsError_t cuvsCagraIndexGetDims(cuvsCagraIndex_t index, int64_t* d
 Get size of the CAGRA index
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsCagraIndexGetSize(cuvsCagraIndex_t index, int64_t* size);
+cuvsError_t cuvsCagraIndexGetSize(cuvsCagraIndex_t index, int64_t* size);
 ```
 
 **Parameters**
@@ -584,7 +589,7 @@ CUVS_EXPORT cuvsError_t cuvsCagraIndexGetSize(cuvsCagraIndex_t index, int64_t* s
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvscagraindexgetgraphdegree"></a>
 ### cuvsCagraIndexGetGraphDegree
@@ -592,7 +597,7 @@ CUVS_EXPORT cuvsError_t cuvsCagraIndexGetSize(cuvsCagraIndex_t index, int64_t* s
 Get graph degree of the CAGRA index
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsCagraIndexGetGraphDegree(cuvsCagraIndex_t index, int64_t* graph_degree);
+cuvsError_t cuvsCagraIndexGetGraphDegree(cuvsCagraIndex_t index, int64_t* graph_degree);
 ```
 
 **Parameters**
@@ -604,7 +609,7 @@ CUVS_EXPORT cuvsError_t cuvsCagraIndexGetGraphDegree(cuvsCagraIndex_t index, int
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvscagraindexgetdataset"></a>
 ### cuvsCagraIndexGetDataset
@@ -612,7 +617,7 @@ CUVS_EXPORT cuvsError_t cuvsCagraIndexGetGraphDegree(cuvsCagraIndex_t index, int
 Returns a view of the CAGRA dataset
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsCagraIndexGetDataset(cuvsCagraIndex_t index, DLManagedTensor* dataset);
+cuvsError_t cuvsCagraIndexGetDataset(cuvsCagraIndex_t index, DLManagedTensor* dataset);
 ```
 
 This function returns a non-owning view of the CAGRA dataset. The output will be referencing device memory that is directly used in CAGRA, without copying the dataset at all. This means that the output is only valid as long as the CAGRA index is alive, and once cuvsCagraIndexDestroy is called on the cagra index - the returned dataset view will be invalid.
@@ -628,7 +633,7 @@ Note that the DLManagedTensor dataset returned will have an associated 'deleter'
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvscagraindexgetgraph"></a>
 ### cuvsCagraIndexGetGraph
@@ -636,7 +641,7 @@ Note that the DLManagedTensor dataset returned will have an associated 'deleter'
 Returns a view of the CAGRA graph
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsCagraIndexGetGraph(cuvsCagraIndex_t index, DLManagedTensor* graph);
+cuvsError_t cuvsCagraIndexGetGraph(cuvsCagraIndex_t index, DLManagedTensor* graph);
 ```
 
 This function returns a non-owning view of the CAGRA graph. The output will be referencing device memory that is directly used in CAGRA, without copying the graph at all. This means that the output is only valid as long as the CAGRA index is alive, and once cuvsCagraIndexDestroy is called on the cagra index - the returned graph view will be invalid.
@@ -652,17 +657,22 @@ Note that the DLManagedTensor graph returned will have an associated 'deleter' f
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 ## C API for CUDA ANN Graph-based nearest neighbor search
 
 <a id="cuvscagrabuild"></a>
 ### cuvsCagraBuild
 
-Build a CAGRA index with a `DLManagedTensor` which has underlying `DLDeviceType` equal to `kDLCUDA`, `kDLCUDAHost`, `kDLCUDAManaged`, or `kDLCPU`. Also, acceptable underlying types are: 1. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32` 2. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 16` 3. `kDLDataType.code == kDLInt` and `kDLDataType.bits = 8` 4. `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 8`
+Build a CAGRA index with a `DLManagedTensor` which has underlying `DLDeviceType` equal to `kDLCUDA`, `kDLCUDAHost`, `kDLCUDAManaged`, or `kDLCPU`. Also, acceptable underlying types are:
+
+1. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32`
+2. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 16`
+3. `kDLDataType.code == kDLInt` and `kDLDataType.bits = 8`
+4. `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 8`
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsCagraBuild(cuvsResources_t res,
+cuvsError_t cuvsCagraBuild(cuvsResources_t res,
 cuvsCagraIndexParams_t params,
 DLManagedTensor* dataset,
 cuvsCagraIndex_t index);
@@ -679,17 +689,21 @@ cuvsCagraIndex_t index);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 ## C API for CUDA ANN Graph-based nearest neighbor search
 
 <a id="cuvscagrasearch"></a>
 ### cuvsCagraSearch
 
-Search a CAGRA index with a `DLManagedTensor` which has underlying `DLDeviceType` equal to `kDLCUDA`, `kDLCUDAHost`, `kDLCUDAManaged`. It is also important to note that the CAGRA Index must have been built with the same type of `queries`, such that `index.dtype.code == queries.dl_tensor.dtype.code` Types for input are: 1. `queries`: a. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32` b. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 16` c. `kDLDataType.code == kDLInt` and `kDLDataType.bits = 8` d. `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 8` 2. `neighbors`: `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 32` or `kDLDataType.code == kDLInt`  and `kDLDataType.bits = 64` 3. `distances`: `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32`
+Search a CAGRA index with a `DLManagedTensor` which has underlying `DLDeviceType` equal to `kDLCUDA`, `kDLCUDAHost`, `kDLCUDAManaged`. It is also important to note that the CAGRA Index must have been built with the same type of `queries`, such that `index.dtype.code == queries.dl_tensor.dtype.code` Types for input are:
+
+1. `queries`: a. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32` b. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 16` c. `kDLDataType.code == kDLInt` and `kDLDataType.bits = 8` d. `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 8`
+2. `neighbors`: `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 32` or `kDLDataType.code == kDLInt`  and `kDLDataType.bits = 64`
+3. `distances`: `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32`
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsCagraSearch(cuvsResources_t res,
+cuvsError_t cuvsCagraSearch(cuvsResources_t res,
 cuvsCagraSearchParams_t params,
 cuvsCagraIndex_t index,
 DLManagedTensor* queries,
@@ -712,7 +726,7 @@ cuvsFilter filter);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 ## CAGRA C-API serialize functions
 
@@ -722,7 +736,7 @@ cuvsFilter filter);
 Save the index to file.
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsCagraSerialize(cuvsResources_t res,
+cuvsError_t cuvsCagraSerialize(cuvsResources_t res,
 const char* filename,
 cuvsCagraIndex_t index,
 bool include_dataset);
@@ -741,7 +755,7 @@ Experimental, both the API and the serialization format are subject to change.
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvscagraserializetohnswlib"></a>
 ### cuvsCagraSerializeToHnswlib
@@ -749,7 +763,7 @@ Experimental, both the API and the serialization format are subject to change.
 Save the CAGRA index to file in hnswlib format.
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsCagraSerializeToHnswlib(cuvsResources_t res,
+cuvsError_t cuvsCagraSerializeToHnswlib(cuvsResources_t res,
 const char* filename,
 cuvsCagraIndex_t index);
 ```
@@ -768,7 +782,7 @@ Experimental, both the API and the serialization format are subject to change.
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvscagradeserialize"></a>
 ### cuvsCagraDeserialize
@@ -776,7 +790,7 @@ Experimental, both the API and the serialization format are subject to change.
 Load index from file.
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsCagraDeserialize(cuvsResources_t res, const char* filename, cuvsCagraIndex_t index);
+cuvsError_t cuvsCagraDeserialize(cuvsResources_t res, const char* filename, cuvsCagraIndex_t index);
 ```
 
 Experimental, both the API and the serialization format are subject to change.
@@ -791,7 +805,7 @@ Experimental, both the API and the serialization format are subject to change.
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvscagraindexfromargs"></a>
 ### cuvsCagraIndexFromArgs
@@ -799,7 +813,7 @@ Experimental, both the API and the serialization format are subject to change.
 Load index from a dataset and graph
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsCagraIndexFromArgs(cuvsResources_t res,
+cuvsError_t cuvsCagraIndexFromArgs(cuvsResources_t res,
 cuvsDistanceType metric,
 DLManagedTensor* graph,
 DLManagedTensor* dataset,
@@ -818,7 +832,7 @@ cuvsCagraIndex_t index);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 ## CAGRA C-API merge functions
 
@@ -828,7 +842,7 @@ cuvsCagraIndex_t index);
 Merge multiple CAGRA indices into a single CAGRA index.
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsCagraMerge(cuvsResources_t res,
+cuvsError_t cuvsCagraMerge(cuvsResources_t res,
 cuvsCagraIndexParams_t params,
 cuvsCagraIndex_t* indices,
 size_t num_indices,
@@ -860,4 +874,4 @@ Example:
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
diff --git a/fern/pages/c_api/c-api-neighbors-hnsw.md b/fern/pages/c_api/c-api-neighbors-hnsw.md
index 9a701fc66b..317c9aa4f0 100644
--- a/fern/pages/c_api/c-api-neighbors-hnsw.md
+++ b/fern/pages/c_api/c-api-neighbors-hnsw.md
@@ -66,7 +66,7 @@ struct cuvsHnswAceParams {
 Allocate HNSW ACE params, and populate with default values
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsHnswAceParamsCreate(cuvsHnswAceParams_t* params);
+cuvsError_t cuvsHnswAceParamsCreate(cuvsHnswAceParams_t* params);
 ```
 
 **Parameters**
@@ -77,7 +77,7 @@ CUVS_EXPORT cuvsError_t cuvsHnswAceParamsCreate(cuvsHnswAceParams_t* params);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvshnswaceparamsdestroy"></a>
 ### cuvsHnswAceParamsDestroy
@@ -85,7 +85,7 @@ CUVS_EXPORT cuvsError_t cuvsHnswAceParamsCreate(cuvsHnswAceParams_t* params);
 De-allocate HNSW ACE params
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsHnswAceParamsDestroy(cuvsHnswAceParams_t params);
+cuvsError_t cuvsHnswAceParamsDestroy(cuvsHnswAceParams_t params);
 ```
 
 **Parameters**
@@ -96,7 +96,7 @@ CUVS_EXPORT cuvsError_t cuvsHnswAceParamsDestroy(cuvsHnswAceParams_t params);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvshnswindexparamscreate"></a>
 ### cuvsHnswIndexParamsCreate
@@ -104,7 +104,7 @@ CUVS_EXPORT cuvsError_t cuvsHnswAceParamsDestroy(cuvsHnswAceParams_t params);
 Allocate HNSW Index params, and populate with default values
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsHnswIndexParamsCreate(cuvsHnswIndexParams_t* params);
+cuvsError_t cuvsHnswIndexParamsCreate(cuvsHnswIndexParams_t* params);
 ```
 
 **Parameters**
@@ -115,7 +115,7 @@ CUVS_EXPORT cuvsError_t cuvsHnswIndexParamsCreate(cuvsHnswIndexParams_t* params)
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvshnswindexparamsdestroy"></a>
 ### cuvsHnswIndexParamsDestroy
@@ -123,7 +123,7 @@ CUVS_EXPORT cuvsError_t cuvsHnswIndexParamsCreate(cuvsHnswIndexParams_t* params)
 De-allocate HNSW Index params
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsHnswIndexParamsDestroy(cuvsHnswIndexParams_t params);
+cuvsError_t cuvsHnswIndexParamsDestroy(cuvsHnswIndexParams_t params);
 ```
 
 **Parameters**
@@ -134,7 +134,7 @@ CUVS_EXPORT cuvsError_t cuvsHnswIndexParamsDestroy(cuvsHnswIndexParams_t params)
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 ## C API for hnswlib wrapper index
 
@@ -163,7 +163,7 @@ typedef struct {
 Allocate HNSW index
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsHnswIndexCreate(cuvsHnswIndex_t* index);
+cuvsError_t cuvsHnswIndexCreate(cuvsHnswIndex_t* index);
 ```
 
 **Parameters**
@@ -174,7 +174,7 @@ CUVS_EXPORT cuvsError_t cuvsHnswIndexCreate(cuvsHnswIndex_t* index);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvshnswindexdestroy"></a>
 ### cuvsHnswIndexDestroy
@@ -182,7 +182,7 @@ CUVS_EXPORT cuvsError_t cuvsHnswIndexCreate(cuvsHnswIndex_t* index);
 De-allocate HNSW index
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsHnswIndexDestroy(cuvsHnswIndex_t index);
+cuvsError_t cuvsHnswIndexDestroy(cuvsHnswIndex_t index);
 ```
 
 **Parameters**
@@ -193,7 +193,7 @@ CUVS_EXPORT cuvsError_t cuvsHnswIndexDestroy(cuvsHnswIndex_t index);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 ## Parameters for extending HNSW index
 
@@ -220,7 +220,7 @@ struct cuvsHnswExtendParams {
 Allocate HNSW extend params, and populate with default values
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsHnswExtendParamsCreate(cuvsHnswExtendParams_t* params);
+cuvsError_t cuvsHnswExtendParamsCreate(cuvsHnswExtendParams_t* params);
 ```
 
 **Parameters**
@@ -231,7 +231,7 @@ CUVS_EXPORT cuvsError_t cuvsHnswExtendParamsCreate(cuvsHnswExtendParams_t* param
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvshnswextendparamsdestroy"></a>
 ### cuvsHnswExtendParamsDestroy
@@ -239,7 +239,7 @@ CUVS_EXPORT cuvsError_t cuvsHnswExtendParamsCreate(cuvsHnswExtendParams_t* param
 De-allocate HNSW extend params
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsHnswExtendParamsDestroy(cuvsHnswExtendParams_t params);
+cuvsError_t cuvsHnswExtendParamsDestroy(cuvsHnswExtendParams_t params);
 ```
 
 **Parameters**
@@ -250,17 +250,20 @@ CUVS_EXPORT cuvsError_t cuvsHnswExtendParamsDestroy(cuvsHnswExtendParams_t param
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 ## Load CAGRA index as hnswlib index
 
 <a id="cuvshnswfromcagra"></a>
 ### cuvsHnswFromCagra
 
-Convert a CAGRA Index to an HNSW index. NOTE: When hierarchy is: 1. `NONE`: This method uses the filesystem to write the CAGRA index in `/tmp/&lt;random_number&gt;.bin` before reading it as an hnswlib index, then deleting the temporary file. The returned index is immutable and can only be searched by the hnswlib wrapper in cuVS, as the format is not compatible with the original hnswlib. 2. `CPU`: The returned index is mutable and can be extended with additional vectors. The serialized index is also compatible with the original hnswlib library.
+Convert a CAGRA Index to an HNSW index. NOTE: When hierarchy is:
+
+1. `NONE`: This method uses the filesystem to write the CAGRA index in `/tmp/&lt;random_number&gt;.bin` before reading it as an hnswlib index, then deleting the temporary file. The returned index is immutable and can only be searched by the hnswlib wrapper in cuVS, as the format is not compatible with the original hnswlib.
+2. `CPU`: The returned index is mutable and can be extended with additional vectors. The serialized index is also compatible with the original hnswlib library.
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsHnswFromCagra(cuvsResources_t res,
+cuvsError_t cuvsHnswFromCagra(cuvsResources_t res,
 cuvsHnswIndexParams_t params,
 cuvsCagraIndex_t cagra_index,
 cuvsHnswIndex_t hnsw_index);
@@ -277,7 +280,7 @@ cuvsHnswIndex_t hnsw_index);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 ## Build HNSW index using ACE algorithm
 
@@ -287,7 +290,7 @@ cuvsHnswIndex_t hnsw_index);
 Build an HNSW index using ACE (Augmented Core Extraction) algorithm.
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsHnswBuild(cuvsResources_t res,
+cuvsError_t cuvsHnswBuild(cuvsResources_t res,
 cuvsHnswIndexParams_t params,
 DLManagedTensor* dataset,
 cuvsHnswIndex_t index);
@@ -312,7 +315,7 @@ NOTE: This function requires CUDA to be available at runtime.
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 ## Extend HNSW index with additional vectors
 
@@ -322,7 +325,7 @@ NOTE: This function requires CUDA to be available at runtime.
 Add new vectors to an HNSW index NOTE: The HNSW index can only be extended when the hierarchy is `CPU` when converting from a CAGRA index.
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsHnswExtend(cuvsResources_t res,
+cuvsError_t cuvsHnswExtend(cuvsResources_t res,
 cuvsHnswExtendParams_t params,
 DLManagedTensor* additional_dataset,
 cuvsHnswIndex_t index);
@@ -339,7 +342,7 @@ cuvsHnswIndex_t index);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 ## C API for hnswlib wrapper search params
 
@@ -368,7 +371,7 @@ struct cuvsHnswSearchParams {
 Allocate HNSW search params, and populate with default values
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsHnswSearchParamsCreate(cuvsHnswSearchParams_t* params);
+cuvsError_t cuvsHnswSearchParamsCreate(cuvsHnswSearchParams_t* params);
 ```
 
 **Parameters**
@@ -379,7 +382,7 @@ CUVS_EXPORT cuvsError_t cuvsHnswSearchParamsCreate(cuvsHnswSearchParams_t* param
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvshnswsearchparamsdestroy"></a>
 ### cuvsHnswSearchParamsDestroy
@@ -387,7 +390,7 @@ CUVS_EXPORT cuvsError_t cuvsHnswSearchParamsCreate(cuvsHnswSearchParams_t* param
 De-allocate HNSW search params
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsHnswSearchParamsDestroy(cuvsHnswSearchParams_t params);
+cuvsError_t cuvsHnswSearchParamsDestroy(cuvsHnswSearchParams_t params);
 ```
 
 **Parameters**
@@ -398,17 +401,21 @@ CUVS_EXPORT cuvsError_t cuvsHnswSearchParamsDestroy(cuvsHnswSearchParams_t param
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 ## C API for CUDA ANN Graph-based nearest neighbor search
 
 <a id="cuvshnswsearch"></a>
 ### cuvsHnswSearch
 
-Search a HNSW index with a `DLManagedTensor` which has underlying `DLDeviceType` equal to `kDLCPU`, `kDLCUDAHost`, or `kDLCUDAManaged`. It is also important to note that the HNSW Index must have been built with the same type of `queries`, such that `index.dtype.code == queries.dl_tensor.dtype.code` Supported types for input are: 1. `queries`: a. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32` b. `kDLDataType.code == kDLInt` and `kDLDataType.bits = 8` c. `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 8` 2. `neighbors`: `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 64` 3. `distances`: `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32` NOTE: When hierarchy is `NONE`, the HNSW index can only be searched by the hnswlib wrapper in cuVS, as the format is not compatible with the original hnswlib.
+Search a HNSW index with a `DLManagedTensor` which has underlying `DLDeviceType` equal to `kDLCPU`, `kDLCUDAHost`, or `kDLCUDAManaged`. It is also important to note that the HNSW Index must have been built with the same type of `queries`, such that `index.dtype.code == queries.dl_tensor.dtype.code` Supported types for input are:
+
+1. `queries`: a. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32` b. `kDLDataType.code == kDLInt` and `kDLDataType.bits = 8` c. `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 8`
+2. `neighbors`: `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 64`
+3. `distances`: `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32` NOTE: When hierarchy is `NONE`, the HNSW index can only be searched by the hnswlib wrapper in cuVS, as the format is not compatible with the original hnswlib.
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsHnswSearch(cuvsResources_t res,
+cuvsError_t cuvsHnswSearch(cuvsResources_t res,
 cuvsHnswSearchParams_t params,
 cuvsHnswIndex_t index,
 DLManagedTensor* queries,
@@ -429,7 +436,7 @@ DLManagedTensor* distances);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 ## HNSW C-API serialize functions
 
@@ -439,7 +446,7 @@ DLManagedTensor* distances);
 Serialize a CAGRA index to a file as an hnswlib index NOTE: When hierarchy is `NONE`, the saved hnswlib index is immutable and can only be read by the hnswlib wrapper in cuVS, as the serialization format is not compatible with the original hnswlib. However, when hierarchy is `CPU`, the saved hnswlib index is compatible with the original hnswlib library.
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsHnswSerialize(cuvsResources_t res, const char* filename, cuvsHnswIndex_t index);
+cuvsError_t cuvsHnswSerialize(cuvsResources_t res, const char* filename, cuvsHnswIndex_t index);
 ```
 
 **Parameters**
@@ -452,7 +459,7 @@ CUVS_EXPORT cuvsError_t cuvsHnswSerialize(cuvsResources_t res, const char* filen
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvshnswdeserialize"></a>
 ### cuvsHnswDeserialize
@@ -460,7 +467,7 @@ CUVS_EXPORT cuvsError_t cuvsHnswSerialize(cuvsResources_t res, const char* filen
 Load hnswlib index from file which was serialized from a HNSW index.
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsHnswDeserialize(cuvsResources_t res,
+cuvsError_t cuvsHnswDeserialize(cuvsResources_t res,
 cuvsHnswIndexParams_t params,
 const char* filename,
 int dim,
@@ -483,4 +490,4 @@ NOTE: When hierarchy is `NONE`, the loaded hnswlib index is immutable, and only
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
diff --git a/fern/pages/c_api/c-api-neighbors-ivf-flat.md b/fern/pages/c_api/c-api-neighbors-ivf-flat.md
index bf420a7603..bf30583dba 100644
--- a/fern/pages/c_api/c-api-neighbors-ivf-flat.md
+++ b/fern/pages/c_api/c-api-neighbors-ivf-flat.md
@@ -45,7 +45,7 @@ struct cuvsIvfFlatIndexParams {
 Allocate IVF-Flat Index params, and populate with default values
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsIvfFlatIndexParamsCreate(cuvsIvfFlatIndexParams_t* index_params);
+cuvsError_t cuvsIvfFlatIndexParamsCreate(cuvsIvfFlatIndexParams_t* index_params);
 ```
 
 **Parameters**
@@ -56,7 +56,7 @@ CUVS_EXPORT cuvsError_t cuvsIvfFlatIndexParamsCreate(cuvsIvfFlatIndexParams_t* i
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsivfflatindexparamsdestroy"></a>
 ### cuvsIvfFlatIndexParamsDestroy
@@ -64,7 +64,7 @@ CUVS_EXPORT cuvsError_t cuvsIvfFlatIndexParamsCreate(cuvsIvfFlatIndexParams_t* i
 De-allocate IVF-Flat Index params
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsIvfFlatIndexParamsDestroy(cuvsIvfFlatIndexParams_t index_params);
+cuvsError_t cuvsIvfFlatIndexParamsDestroy(cuvsIvfFlatIndexParams_t index_params);
 ```
 
 **Parameters**
@@ -75,7 +75,7 @@ CUVS_EXPORT cuvsError_t cuvsIvfFlatIndexParamsDestroy(cuvsIvfFlatIndexParams_t i
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 ## IVF-Flat index search parameters
 
@@ -102,7 +102,7 @@ struct cuvsIvfFlatSearchParams {
 Allocate IVF-Flat search params, and populate with default values
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsIvfFlatSearchParamsCreate(cuvsIvfFlatSearchParams_t* params);
+cuvsError_t cuvsIvfFlatSearchParamsCreate(cuvsIvfFlatSearchParams_t* params);
 ```
 
 **Parameters**
@@ -113,7 +113,7 @@ CUVS_EXPORT cuvsError_t cuvsIvfFlatSearchParamsCreate(cuvsIvfFlatSearchParams_t*
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsivfflatsearchparamsdestroy"></a>
 ### cuvsIvfFlatSearchParamsDestroy
@@ -121,7 +121,7 @@ CUVS_EXPORT cuvsError_t cuvsIvfFlatSearchParamsCreate(cuvsIvfFlatSearchParams_t*
 De-allocate IVF-Flat search params
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsIvfFlatSearchParamsDestroy(cuvsIvfFlatSearchParams_t params);
+cuvsError_t cuvsIvfFlatSearchParamsDestroy(cuvsIvfFlatSearchParams_t params);
 ```
 
 **Parameters**
@@ -132,7 +132,7 @@ CUVS_EXPORT cuvsError_t cuvsIvfFlatSearchParamsDestroy(cuvsIvfFlatSearchParams_t
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 ## IVF-Flat index
 
@@ -161,7 +161,7 @@ typedef struct {
 Allocate IVF-Flat index
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsIvfFlatIndexCreate(cuvsIvfFlatIndex_t* index);
+cuvsError_t cuvsIvfFlatIndexCreate(cuvsIvfFlatIndex_t* index);
 ```
 
 **Parameters**
@@ -172,7 +172,7 @@ CUVS_EXPORT cuvsError_t cuvsIvfFlatIndexCreate(cuvsIvfFlatIndex_t* index);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsivfflatindexdestroy"></a>
 ### cuvsIvfFlatIndexDestroy
@@ -180,7 +180,7 @@ CUVS_EXPORT cuvsError_t cuvsIvfFlatIndexCreate(cuvsIvfFlatIndex_t* index);
 De-allocate IVF-Flat index
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsIvfFlatIndexDestroy(cuvsIvfFlatIndex_t index);
+cuvsError_t cuvsIvfFlatIndexDestroy(cuvsIvfFlatIndex_t index);
 ```
 
 **Parameters**
@@ -191,7 +191,7 @@ CUVS_EXPORT cuvsError_t cuvsIvfFlatIndexDestroy(cuvsIvfFlatIndex_t index);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsivfflatindexgetnlists"></a>
 ### cuvsIvfFlatIndexGetNLists
@@ -199,7 +199,7 @@ CUVS_EXPORT cuvsError_t cuvsIvfFlatIndexDestroy(cuvsIvfFlatIndex_t index);
 Get the number of clusters/inverted lists in the index
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsIvfFlatIndexGetNLists(cuvsIvfFlatIndex_t index, int64_t* n_lists);
+cuvsError_t cuvsIvfFlatIndexGetNLists(cuvsIvfFlatIndex_t index, int64_t* n_lists);
 ```
 
 **Parameters**
@@ -211,7 +211,7 @@ CUVS_EXPORT cuvsError_t cuvsIvfFlatIndexGetNLists(cuvsIvfFlatIndex_t index, int6
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsivfflatindexgetdim"></a>
 ### cuvsIvfFlatIndexGetDim
@@ -219,7 +219,7 @@ CUVS_EXPORT cuvsError_t cuvsIvfFlatIndexGetNLists(cuvsIvfFlatIndex_t index, int6
 Get the dimensionality of the indexed data
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsIvfFlatIndexGetDim(cuvsIvfFlatIndex_t index, int64_t* dim);
+cuvsError_t cuvsIvfFlatIndexGetDim(cuvsIvfFlatIndex_t index, int64_t* dim);
 ```
 
 **Parameters**
@@ -231,7 +231,7 @@ CUVS_EXPORT cuvsError_t cuvsIvfFlatIndexGetDim(cuvsIvfFlatIndex_t index, int64_t
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsivfflatindexgetcenters"></a>
 ### cuvsIvfFlatIndexGetCenters
@@ -239,7 +239,7 @@ CUVS_EXPORT cuvsError_t cuvsIvfFlatIndexGetDim(cuvsIvfFlatIndex_t index, int64_t
 Get the cluster centers corresponding to the lists [n_lists, dim]
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsIvfFlatIndexGetCenters(cuvsIvfFlatIndex_t index, DLManagedTensor* centers);
+cuvsError_t cuvsIvfFlatIndexGetCenters(cuvsIvfFlatIndex_t index, DLManagedTensor* centers);
 ```
 
 **Parameters**
@@ -251,17 +251,21 @@ CUVS_EXPORT cuvsError_t cuvsIvfFlatIndexGetCenters(cuvsIvfFlatIndex_t index, DLM
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 ## IVF-Flat index build
 
 <a id="cuvsivfflatbuild"></a>
 ### cuvsIvfFlatBuild
 
-Build a IVF-Flat index with a `DLManagedTensor` which has underlying `DLDeviceType` equal to `kDLCUDA`, `kDLCUDAHost`, `kDLCUDAManaged`, or `kDLCPU`. Also, acceptable underlying types are: 1. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32` 2. `kDLDataType.code == kDLInt` and `kDLDataType.bits = 8` 3. `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 8`
+Build a IVF-Flat index with a `DLManagedTensor` which has underlying `DLDeviceType` equal to `kDLCUDA`, `kDLCUDAHost`, `kDLCUDAManaged`, or `kDLCPU`. Also, acceptable underlying types are:
+
+1. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32`
+2. `kDLDataType.code == kDLInt` and `kDLDataType.bits = 8`
+3. `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 8`
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsIvfFlatBuild(cuvsResources_t res,
+cuvsError_t cuvsIvfFlatBuild(cuvsResources_t res,
 cuvsIvfFlatIndexParams_t index_params,
 DLManagedTensor* dataset,
 cuvsIvfFlatIndex_t index);
@@ -278,17 +282,21 @@ cuvsIvfFlatIndex_t index);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 ## IVF-Flat index search
 
 <a id="cuvsivfflatsearch"></a>
 ### cuvsIvfFlatSearch
 
-Search a IVF-Flat index with a `DLManagedTensor` which has underlying `DLDeviceType` equal to `kDLCUDA`, `kDLCUDAHost`, `kDLCUDAManaged`. It is also important to note that the IVF-Flat Index must have been built with the same type of `queries`, such that `index.dtype.code == queries.dl_tensor.dtype.code` Types for input are: 1. `queries`: `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32` 2. `neighbors`: `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 32` 3. `distances`: `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32`
+Search a IVF-Flat index with a `DLManagedTensor` which has underlying `DLDeviceType` equal to `kDLCUDA`, `kDLCUDAHost`, `kDLCUDAManaged`. It is also important to note that the IVF-Flat Index must have been built with the same type of `queries`, such that `index.dtype.code == queries.dl_tensor.dtype.code` Types for input are:
+
+1. `queries`: `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32`
+2. `neighbors`: `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 32`
+3. `distances`: `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32`
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsIvfFlatSearch(cuvsResources_t res,
+cuvsError_t cuvsIvfFlatSearch(cuvsResources_t res,
 cuvsIvfFlatSearchParams_t search_params,
 cuvsIvfFlatIndex_t index,
 DLManagedTensor* queries,
@@ -311,7 +319,7 @@ cuvsFilter filter);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 ## IVF-Flat C-API serialize functions
 
@@ -321,7 +329,7 @@ cuvsFilter filter);
 Save the index to file.
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsIvfFlatSerialize(cuvsResources_t res,
+cuvsError_t cuvsIvfFlatSerialize(cuvsResources_t res,
 const char* filename,
 cuvsIvfFlatIndex_t index);
 ```
@@ -338,7 +346,7 @@ Experimental, both the API and the serialization format are subject to change.
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsivfflatdeserialize"></a>
 ### cuvsIvfFlatDeserialize
@@ -346,7 +354,7 @@ Experimental, both the API and the serialization format are subject to change.
 Load index from file.
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsIvfFlatDeserialize(cuvsResources_t res,
+cuvsError_t cuvsIvfFlatDeserialize(cuvsResources_t res,
 const char* filename,
 cuvsIvfFlatIndex_t index);
 ```
@@ -363,7 +371,7 @@ Experimental, both the API and the serialization format are subject to change.
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 ## IVF-Flat index extend
 
@@ -373,7 +381,7 @@ Experimental, both the API and the serialization format are subject to change.
 Extend the index with the new data.
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsIvfFlatExtend(cuvsResources_t res,
+cuvsError_t cuvsIvfFlatExtend(cuvsResources_t res,
 DLManagedTensor* new_vectors,
 DLManagedTensor* new_indices,
 cuvsIvfFlatIndex_t index);
@@ -390,4 +398,4 @@ cuvsIvfFlatIndex_t index);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
diff --git a/fern/pages/c_api/c-api-neighbors-ivf-pq.md b/fern/pages/c_api/c-api-neighbors-ivf-pq.md
index f903f1169b..5fb9c21940 100644
--- a/fern/pages/c_api/c-api-neighbors-ivf-pq.md
+++ b/fern/pages/c_api/c-api-neighbors-ivf-pq.md
@@ -93,7 +93,7 @@ struct cuvsIvfPqIndexParams {
 Allocate IVF-PQ Index params, and populate with default values
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsIvfPqIndexParamsCreate(cuvsIvfPqIndexParams_t* index_params);
+cuvsError_t cuvsIvfPqIndexParamsCreate(cuvsIvfPqIndexParams_t* index_params);
 ```
 
 **Parameters**
@@ -104,7 +104,7 @@ CUVS_EXPORT cuvsError_t cuvsIvfPqIndexParamsCreate(cuvsIvfPqIndexParams_t* index
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsivfpqindexparamsdestroy"></a>
 ### cuvsIvfPqIndexParamsDestroy
@@ -112,7 +112,7 @@ CUVS_EXPORT cuvsError_t cuvsIvfPqIndexParamsCreate(cuvsIvfPqIndexParams_t* index
 De-allocate IVF-PQ Index params
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsIvfPqIndexParamsDestroy(cuvsIvfPqIndexParams_t index_params);
+cuvsError_t cuvsIvfPqIndexParamsDestroy(cuvsIvfPqIndexParams_t index_params);
 ```
 
 **Parameters**
@@ -123,7 +123,7 @@ CUVS_EXPORT cuvsError_t cuvsIvfPqIndexParamsDestroy(cuvsIvfPqIndexParams_t index
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 ## IVF-PQ index search parameters
 
@@ -160,7 +160,7 @@ struct cuvsIvfPqSearchParams {
 Allocate IVF-PQ search params, and populate with default values
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsIvfPqSearchParamsCreate(cuvsIvfPqSearchParams_t* params);
+cuvsError_t cuvsIvfPqSearchParamsCreate(cuvsIvfPqSearchParams_t* params);
 ```
 
 **Parameters**
@@ -171,7 +171,7 @@ CUVS_EXPORT cuvsError_t cuvsIvfPqSearchParamsCreate(cuvsIvfPqSearchParams_t* par
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsivfpqsearchparamsdestroy"></a>
 ### cuvsIvfPqSearchParamsDestroy
@@ -179,7 +179,7 @@ CUVS_EXPORT cuvsError_t cuvsIvfPqSearchParamsCreate(cuvsIvfPqSearchParams_t* par
 De-allocate IVF-PQ search params
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsIvfPqSearchParamsDestroy(cuvsIvfPqSearchParams_t params);
+cuvsError_t cuvsIvfPqSearchParamsDestroy(cuvsIvfPqSearchParams_t params);
 ```
 
 **Parameters**
@@ -190,7 +190,7 @@ CUVS_EXPORT cuvsError_t cuvsIvfPqSearchParamsDestroy(cuvsIvfPqSearchParams_t par
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 ## IVF-PQ index
 
@@ -219,7 +219,7 @@ typedef struct {
 Allocate IVF-PQ index
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsIvfPqIndexCreate(cuvsIvfPqIndex_t* index);
+cuvsError_t cuvsIvfPqIndexCreate(cuvsIvfPqIndex_t* index);
 ```
 
 **Parameters**
@@ -230,7 +230,7 @@ CUVS_EXPORT cuvsError_t cuvsIvfPqIndexCreate(cuvsIvfPqIndex_t* index);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsivfpqindexdestroy"></a>
 ### cuvsIvfPqIndexDestroy
@@ -238,7 +238,7 @@ CUVS_EXPORT cuvsError_t cuvsIvfPqIndexCreate(cuvsIvfPqIndex_t* index);
 De-allocate IVF-PQ index
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsIvfPqIndexDestroy(cuvsIvfPqIndex_t index);
+cuvsError_t cuvsIvfPqIndexDestroy(cuvsIvfPqIndex_t index);
 ```
 
 **Parameters**
@@ -249,7 +249,7 @@ CUVS_EXPORT cuvsError_t cuvsIvfPqIndexDestroy(cuvsIvfPqIndex_t index);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsivfpqindexgetnlists"></a>
 ### cuvsIvfPqIndexGetNLists
@@ -257,7 +257,7 @@ CUVS_EXPORT cuvsError_t cuvsIvfPqIndexDestroy(cuvsIvfPqIndex_t index);
 Get the number of clusters/inverted lists
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsIvfPqIndexGetNLists(cuvsIvfPqIndex_t index, int64_t* n_lists);
+cuvsError_t cuvsIvfPqIndexGetNLists(cuvsIvfPqIndex_t index, int64_t* n_lists);
 ```
 
 **Parameters**
@@ -269,7 +269,7 @@ CUVS_EXPORT cuvsError_t cuvsIvfPqIndexGetNLists(cuvsIvfPqIndex_t index, int64_t*
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsivfpqindexgetdim"></a>
 ### cuvsIvfPqIndexGetDim
@@ -277,7 +277,7 @@ CUVS_EXPORT cuvsError_t cuvsIvfPqIndexGetNLists(cuvsIvfPqIndex_t index, int64_t*
 Get the dimensionality
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsIvfPqIndexGetDim(cuvsIvfPqIndex_t index, int64_t* dim);
+cuvsError_t cuvsIvfPqIndexGetDim(cuvsIvfPqIndex_t index, int64_t* dim);
 ```
 
 **Parameters**
@@ -289,7 +289,7 @@ CUVS_EXPORT cuvsError_t cuvsIvfPqIndexGetDim(cuvsIvfPqIndex_t index, int64_t* di
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsivfpqindexgetsize"></a>
 ### cuvsIvfPqIndexGetSize
@@ -297,7 +297,7 @@ CUVS_EXPORT cuvsError_t cuvsIvfPqIndexGetDim(cuvsIvfPqIndex_t index, int64_t* di
 Get the size of the index
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsIvfPqIndexGetSize(cuvsIvfPqIndex_t index, int64_t* size);
+cuvsError_t cuvsIvfPqIndexGetSize(cuvsIvfPqIndex_t index, int64_t* size);
 ```
 
 **Parameters**
@@ -309,7 +309,7 @@ CUVS_EXPORT cuvsError_t cuvsIvfPqIndexGetSize(cuvsIvfPqIndex_t index, int64_t* s
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsivfpqindexgetpqdim"></a>
 ### cuvsIvfPqIndexGetPqDim
@@ -317,7 +317,7 @@ CUVS_EXPORT cuvsError_t cuvsIvfPqIndexGetSize(cuvsIvfPqIndex_t index, int64_t* s
 Get the dimensionality of an encoded vector after compression by PQ.
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsIvfPqIndexGetPqDim(cuvsIvfPqIndex_t index, int64_t* pq_dim);
+cuvsError_t cuvsIvfPqIndexGetPqDim(cuvsIvfPqIndex_t index, int64_t* pq_dim);
 ```
 
 **Parameters**
@@ -329,7 +329,7 @@ CUVS_EXPORT cuvsError_t cuvsIvfPqIndexGetPqDim(cuvsIvfPqIndex_t index, int64_t*
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsivfpqindexgetpqbits"></a>
 ### cuvsIvfPqIndexGetPqBits
@@ -337,7 +337,7 @@ CUVS_EXPORT cuvsError_t cuvsIvfPqIndexGetPqDim(cuvsIvfPqIndex_t index, int64_t*
 Get the bit length of an encoded vector element after compression by PQ.
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsIvfPqIndexGetPqBits(cuvsIvfPqIndex_t index, int64_t* pq_bits);
+cuvsError_t cuvsIvfPqIndexGetPqBits(cuvsIvfPqIndex_t index, int64_t* pq_bits);
 ```
 
 **Parameters**
@@ -349,7 +349,7 @@ CUVS_EXPORT cuvsError_t cuvsIvfPqIndexGetPqBits(cuvsIvfPqIndex_t index, int64_t*
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsivfpqindexgetpqlen"></a>
 ### cuvsIvfPqIndexGetPqLen
@@ -357,7 +357,7 @@ CUVS_EXPORT cuvsError_t cuvsIvfPqIndexGetPqBits(cuvsIvfPqIndex_t index, int64_t*
 Get the Dimensionality of a subspace, i.e. the number of vector
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsIvfPqIndexGetPqLen(cuvsIvfPqIndex_t index, int64_t* pq_len);
+cuvsError_t cuvsIvfPqIndexGetPqLen(cuvsIvfPqIndex_t index, int64_t* pq_len);
 ```
 
 components mapped to a subspace
@@ -371,7 +371,7 @@ components mapped to a subspace
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsivfpqindexgetcenters"></a>
 ### cuvsIvfPqIndexGetCenters
@@ -379,7 +379,7 @@ components mapped to a subspace
 Get the cluster centers corresponding to the lists in the original space
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsIvfPqIndexGetCenters(cuvsIvfPqIndex_t index, DLManagedTensor* centers);
+cuvsError_t cuvsIvfPqIndexGetCenters(cuvsIvfPqIndex_t index, DLManagedTensor* centers);
 ```
 
 **Parameters**
@@ -391,7 +391,7 @@ CUVS_EXPORT cuvsError_t cuvsIvfPqIndexGetCenters(cuvsIvfPqIndex_t index, DLManag
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsivfpqindexgetcenterspadded"></a>
 ### cuvsIvfPqIndexGetCentersPadded
@@ -399,7 +399,7 @@ CUVS_EXPORT cuvsError_t cuvsIvfPqIndexGetCenters(cuvsIvfPqIndex_t index, DLManag
 Get the padded cluster centers [n_lists, dim_ext] where dim_ext = round_up(dim + 1, 8)
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsIvfPqIndexGetCentersPadded(cuvsIvfPqIndex_t index, DLManagedTensor* centers);
+cuvsError_t cuvsIvfPqIndexGetCentersPadded(cuvsIvfPqIndex_t index, DLManagedTensor* centers);
 ```
 
 This returns the full padded centers as a contiguous array, suitable for use with cuvsIvfPqBuildPrecomputed.
@@ -413,7 +413,7 @@ This returns the full padded centers as a contiguous array, suitable for use wit
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsivfpqindexgetpqcenters"></a>
 ### cuvsIvfPqIndexGetPqCenters
@@ -421,7 +421,7 @@ This returns the full padded centers as a contiguous array, suitable for use wit
 Get the PQ cluster centers
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsIvfPqIndexGetPqCenters(cuvsIvfPqIndex_t index, DLManagedTensor* pq_centers);
+cuvsError_t cuvsIvfPqIndexGetPqCenters(cuvsIvfPqIndex_t index, DLManagedTensor* pq_centers);
 ```
 
 - CUVS_IVF_PQ_CODEBOOK_GEN_PER_SUBSPACE: [pq_dim , pq_len, pq_book_size]
@@ -436,7 +436,7 @@ CUVS_EXPORT cuvsError_t cuvsIvfPqIndexGetPqCenters(cuvsIvfPqIndex_t index, DLMan
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsivfpqindexgetcentersrot"></a>
 ### cuvsIvfPqIndexGetCentersRot
@@ -444,7 +444,7 @@ CUVS_EXPORT cuvsError_t cuvsIvfPqIndexGetPqCenters(cuvsIvfPqIndex_t index, DLMan
 Get the rotated cluster centers [n_lists, rot_dim] where rot_dim = pq_len * pq_dim
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsIvfPqIndexGetCentersRot(cuvsIvfPqIndex_t index, DLManagedTensor* centers_rot);
+cuvsError_t cuvsIvfPqIndexGetCentersRot(cuvsIvfPqIndex_t index, DLManagedTensor* centers_rot);
 ```
 
 **Parameters**
@@ -456,7 +456,7 @@ CUVS_EXPORT cuvsError_t cuvsIvfPqIndexGetCentersRot(cuvsIvfPqIndex_t index, DLMa
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsivfpqindexgetrotationmatrix"></a>
 ### cuvsIvfPqIndexGetRotationMatrix
@@ -464,7 +464,7 @@ CUVS_EXPORT cuvsError_t cuvsIvfPqIndexGetCentersRot(cuvsIvfPqIndex_t index, DLMa
 Get the rotation matrix [rot_dim, dim] Transform matrix (original space -&gt; rotated padded space)
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsIvfPqIndexGetRotationMatrix(cuvsIvfPqIndex_t index,
+cuvsError_t cuvsIvfPqIndexGetRotationMatrix(cuvsIvfPqIndex_t index,
 DLManagedTensor* rotation_matrix);
 ```
 
@@ -477,7 +477,7 @@ DLManagedTensor* rotation_matrix);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsivfpqindexgetlistsizes"></a>
 ### cuvsIvfPqIndexGetListSizes
@@ -485,7 +485,7 @@ DLManagedTensor* rotation_matrix);
 Get the sizes of each list
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsIvfPqIndexGetListSizes(cuvsIvfPqIndex_t index, DLManagedTensor* list_sizes);
+cuvsError_t cuvsIvfPqIndexGetListSizes(cuvsIvfPqIndex_t index, DLManagedTensor* list_sizes);
 ```
 
 **Parameters**
@@ -497,7 +497,7 @@ CUVS_EXPORT cuvsError_t cuvsIvfPqIndexGetListSizes(cuvsIvfPqIndex_t index, DLMan
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsivfpqindexunpackcontiguouslistdata"></a>
 ### cuvsIvfPqIndexUnpackContiguousListData
@@ -505,7 +505,7 @@ CUVS_EXPORT cuvsError_t cuvsIvfPqIndexGetListSizes(cuvsIvfPqIndex_t index, DLMan
 Unpack `n_rows` consecutive PQ encoded vectors of a single list (cluster) in the compressed index starting at given `offset`, not expanded to one code per byte. Each code in the output buffer occupies ceildiv(index.pq_dim() * index.pq_bits(), 8) bytes.
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsIvfPqIndexUnpackContiguousListData(cuvsResources_t res,
+cuvsError_t cuvsIvfPqIndexUnpackContiguousListData(cuvsResources_t res,
 cuvsIvfPqIndex_t index,
 DLManagedTensor* out_codes,
 uint32_t label,
@@ -524,7 +524,7 @@ uint32_t offset);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsivfpqindexgetlistindices"></a>
 ### cuvsIvfPqIndexGetListIndices
@@ -532,7 +532,7 @@ uint32_t offset);
 Get the indices of each vector in a ivf-pq list
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsIvfPqIndexGetListIndices(cuvsIvfPqIndex_t index,
+cuvsError_t cuvsIvfPqIndexGetListIndices(cuvsIvfPqIndex_t index,
 uint32_t label,
 DLManagedTensor* out_labels);
 ```
@@ -547,17 +547,22 @@ DLManagedTensor* out_labels);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 ## IVF-PQ index build
 
 <a id="cuvsivfpqbuild"></a>
 ### cuvsIvfPqBuild
 
-Build a IVF-PQ index with a `DLManagedTensor` which has underlying `DLDeviceType` equal to `kDLCUDA`, `kDLCUDAHost`, `kDLCUDAManaged`, or `kDLCPU`. Also, acceptable underlying types are: 1. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32` 2. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 16` 3. `kDLDataType.code == kDLInt` and `kDLDataType.bits = 8` 4. `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 8`
+Build a IVF-PQ index with a `DLManagedTensor` which has underlying `DLDeviceType` equal to `kDLCUDA`, `kDLCUDAHost`, `kDLCUDAManaged`, or `kDLCPU`. Also, acceptable underlying types are:
+
+1. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32`
+2. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 16`
+3. `kDLDataType.code == kDLInt` and `kDLDataType.bits = 8`
+4. `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 8`
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsIvfPqBuild(cuvsResources_t res,
+cuvsError_t cuvsIvfPqBuild(cuvsResources_t res,
 cuvsIvfPqIndexParams_t params,
 DLManagedTensor* dataset,
 cuvsIvfPqIndex_t index);
@@ -574,7 +579,7 @@ cuvsIvfPqIndex_t index);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsivfpqbuildprecomputed"></a>
 ### cuvsIvfPqBuildPrecomputed
@@ -582,7 +587,7 @@ cuvsIvfPqIndex_t index);
 Build a view-type IVF-PQ index from device memory precomputed centroids and codebook.
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsIvfPqBuildPrecomputed(cuvsResources_t res,
+cuvsError_t cuvsIvfPqBuildPrecomputed(cuvsResources_t res,
 cuvsIvfPqIndexParams_t params,
 uint32_t dim,
 DLManagedTensor* pq_centers,
@@ -615,17 +620,21 @@ The index_params must be consistent with the provided matrices. Specifically:
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 ## IVF-PQ index search
 
 <a id="cuvsivfpqsearch"></a>
 ### cuvsIvfPqSearch
 
-Search a IVF-PQ index with a `DLManagedTensor` which has underlying `DLDeviceType` equal to `kDLCUDA`, `kDLCUDAHost`, `kDLCUDAManaged`. It is also important to note that the IVF-PQ Index must have been built with the same type of `queries`, such that `index.dtype.code == queries.dl_tensor.dtype.code` Types for input are: 1. `queries`: `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32` or `kDLDataType.bits = 16` 2. `neighbors`: `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 32` 3. `distances`: `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32`
+Search a IVF-PQ index with a `DLManagedTensor` which has underlying `DLDeviceType` equal to `kDLCUDA`, `kDLCUDAHost`, `kDLCUDAManaged`. It is also important to note that the IVF-PQ Index must have been built with the same type of `queries`, such that `index.dtype.code == queries.dl_tensor.dtype.code` Types for input are:
+
+1. `queries`: `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32` or `kDLDataType.bits = 16`
+2. `neighbors`: `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 32`
+3. `distances`: `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32`
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsIvfPqSearch(cuvsResources_t res,
+cuvsError_t cuvsIvfPqSearch(cuvsResources_t res,
 cuvsIvfPqSearchParams_t search_params,
 cuvsIvfPqIndex_t index,
 DLManagedTensor* queries,
@@ -646,7 +655,7 @@ DLManagedTensor* distances);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 ## IVF-PQ C-API serialize functions
 
@@ -656,7 +665,7 @@ DLManagedTensor* distances);
 Save the index to file.
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsIvfPqSerialize(cuvsResources_t res, const char* filename, cuvsIvfPqIndex_t index);
+cuvsError_t cuvsIvfPqSerialize(cuvsResources_t res, const char* filename, cuvsIvfPqIndex_t index);
 ```
 
 Experimental, both the API and the serialization format are subject to change.
@@ -671,7 +680,7 @@ Experimental, both the API and the serialization format are subject to change.
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsivfpqdeserialize"></a>
 ### cuvsIvfPqDeserialize
@@ -679,7 +688,7 @@ Experimental, both the API and the serialization format are subject to change.
 Load index from file.
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsIvfPqDeserialize(cuvsResources_t res, const char* filename, cuvsIvfPqIndex_t index);
+cuvsError_t cuvsIvfPqDeserialize(cuvsResources_t res, const char* filename, cuvsIvfPqIndex_t index);
 ```
 
 Experimental, both the API and the serialization format are subject to change.
@@ -694,7 +703,7 @@ Experimental, both the API and the serialization format are subject to change.
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 ## IVF-PQ index extend
 
@@ -704,7 +713,7 @@ Experimental, both the API and the serialization format are subject to change.
 Extend the index with the new data.
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsIvfPqExtend(cuvsResources_t res,
+cuvsError_t cuvsIvfPqExtend(cuvsResources_t res,
 DLManagedTensor* new_vectors,
 DLManagedTensor* new_indices,
 cuvsIvfPqIndex_t index);
@@ -721,7 +730,7 @@ cuvsIvfPqIndex_t index);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 ## IVF-PQ index transform
 
@@ -731,7 +740,7 @@ cuvsIvfPqIndex_t index);
 Transform the input data by applying pq-encoding
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsIvfPqTransform(cuvsResources_t res,
+cuvsError_t cuvsIvfPqTransform(cuvsResources_t res,
 cuvsIvfPqIndex_t index,
 DLManagedTensor* input_dataset,
 DLManagedTensor* output_labels,
@@ -750,4 +759,4 @@ DLManagedTensor* output_dataset);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
diff --git a/fern/pages/c_api/c-api-neighbors-ivf-sq.md b/fern/pages/c_api/c-api-neighbors-ivf-sq.md
index d339c813af..a06794cb08 100644
--- a/fern/pages/c_api/c-api-neighbors-ivf-sq.md
+++ b/fern/pages/c_api/c-api-neighbors-ivf-sq.md
@@ -43,7 +43,7 @@ struct cuvsIvfSqIndexParams {
 Allocate IVF-SQ Index params, and populate with default values
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsIvfSqIndexParamsCreate(cuvsIvfSqIndexParams_t* index_params);
+cuvsError_t cuvsIvfSqIndexParamsCreate(cuvsIvfSqIndexParams_t* index_params);
 ```
 
 **Parameters**
@@ -54,7 +54,7 @@ CUVS_EXPORT cuvsError_t cuvsIvfSqIndexParamsCreate(cuvsIvfSqIndexParams_t* index
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsivfsqindexparamsdestroy"></a>
 ### cuvsIvfSqIndexParamsDestroy
@@ -62,7 +62,7 @@ CUVS_EXPORT cuvsError_t cuvsIvfSqIndexParamsCreate(cuvsIvfSqIndexParams_t* index
 De-allocate IVF-SQ Index params
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsIvfSqIndexParamsDestroy(cuvsIvfSqIndexParams_t index_params);
+cuvsError_t cuvsIvfSqIndexParamsDestroy(cuvsIvfSqIndexParams_t index_params);
 ```
 
 **Parameters**
@@ -73,7 +73,7 @@ CUVS_EXPORT cuvsError_t cuvsIvfSqIndexParamsDestroy(cuvsIvfSqIndexParams_t index
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 ## IVF-SQ index search parameters
 
@@ -100,7 +100,7 @@ struct cuvsIvfSqSearchParams {
 Allocate IVF-SQ search params, and populate with default values
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsIvfSqSearchParamsCreate(cuvsIvfSqSearchParams_t* params);
+cuvsError_t cuvsIvfSqSearchParamsCreate(cuvsIvfSqSearchParams_t* params);
 ```
 
 **Parameters**
@@ -111,7 +111,7 @@ CUVS_EXPORT cuvsError_t cuvsIvfSqSearchParamsCreate(cuvsIvfSqSearchParams_t* par
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsivfsqsearchparamsdestroy"></a>
 ### cuvsIvfSqSearchParamsDestroy
@@ -119,7 +119,7 @@ CUVS_EXPORT cuvsError_t cuvsIvfSqSearchParamsCreate(cuvsIvfSqSearchParams_t* par
 De-allocate IVF-SQ search params
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsIvfSqSearchParamsDestroy(cuvsIvfSqSearchParams_t params);
+cuvsError_t cuvsIvfSqSearchParamsDestroy(cuvsIvfSqSearchParams_t params);
 ```
 
 **Parameters**
@@ -130,7 +130,7 @@ CUVS_EXPORT cuvsError_t cuvsIvfSqSearchParamsDestroy(cuvsIvfSqSearchParams_t par
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 ## IVF-SQ index
 
@@ -159,7 +159,7 @@ typedef struct {
 Allocate IVF-SQ index
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsIvfSqIndexCreate(cuvsIvfSqIndex_t* index);
+cuvsError_t cuvsIvfSqIndexCreate(cuvsIvfSqIndex_t* index);
 ```
 
 **Parameters**
@@ -170,7 +170,7 @@ CUVS_EXPORT cuvsError_t cuvsIvfSqIndexCreate(cuvsIvfSqIndex_t* index);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsivfsqindexdestroy"></a>
 ### cuvsIvfSqIndexDestroy
@@ -178,7 +178,7 @@ CUVS_EXPORT cuvsError_t cuvsIvfSqIndexCreate(cuvsIvfSqIndex_t* index);
 De-allocate IVF-SQ index
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsIvfSqIndexDestroy(cuvsIvfSqIndex_t index);
+cuvsError_t cuvsIvfSqIndexDestroy(cuvsIvfSqIndex_t index);
 ```
 
 **Parameters**
@@ -189,7 +189,7 @@ CUVS_EXPORT cuvsError_t cuvsIvfSqIndexDestroy(cuvsIvfSqIndex_t index);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsivfsqindexgetnlists"></a>
 ### cuvsIvfSqIndexGetNLists
@@ -197,7 +197,7 @@ CUVS_EXPORT cuvsError_t cuvsIvfSqIndexDestroy(cuvsIvfSqIndex_t index);
 Get the number of clusters/inverted lists
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsIvfSqIndexGetNLists(cuvsIvfSqIndex_t index, int64_t* n_lists);
+cuvsError_t cuvsIvfSqIndexGetNLists(cuvsIvfSqIndex_t index, int64_t* n_lists);
 ```
 
 **Parameters**
@@ -209,7 +209,7 @@ CUVS_EXPORT cuvsError_t cuvsIvfSqIndexGetNLists(cuvsIvfSqIndex_t index, int64_t*
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsivfsqindexgetdim"></a>
 ### cuvsIvfSqIndexGetDim
@@ -217,7 +217,7 @@ CUVS_EXPORT cuvsError_t cuvsIvfSqIndexGetNLists(cuvsIvfSqIndex_t index, int64_t*
 Get the dimensionality of the data
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsIvfSqIndexGetDim(cuvsIvfSqIndex_t index, int64_t* dim);
+cuvsError_t cuvsIvfSqIndexGetDim(cuvsIvfSqIndex_t index, int64_t* dim);
 ```
 
 **Parameters**
@@ -229,7 +229,7 @@ CUVS_EXPORT cuvsError_t cuvsIvfSqIndexGetDim(cuvsIvfSqIndex_t index, int64_t* di
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsivfsqindexgetsize"></a>
 ### cuvsIvfSqIndexGetSize
@@ -237,7 +237,7 @@ CUVS_EXPORT cuvsError_t cuvsIvfSqIndexGetDim(cuvsIvfSqIndex_t index, int64_t* di
 Get the size of the index
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsIvfSqIndexGetSize(cuvsIvfSqIndex_t index, int64_t* size);
+cuvsError_t cuvsIvfSqIndexGetSize(cuvsIvfSqIndex_t index, int64_t* size);
 ```
 
 **Parameters**
@@ -249,7 +249,7 @@ CUVS_EXPORT cuvsError_t cuvsIvfSqIndexGetSize(cuvsIvfSqIndex_t index, int64_t* s
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsivfsqindexgetcenters"></a>
 ### cuvsIvfSqIndexGetCenters
@@ -257,7 +257,7 @@ CUVS_EXPORT cuvsError_t cuvsIvfSqIndexGetSize(cuvsIvfSqIndex_t index, int64_t* s
 Get the cluster centers corresponding to the lists [n_lists, dim]
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsIvfSqIndexGetCenters(cuvsIvfSqIndex_t index, DLManagedTensor* centers);
+cuvsError_t cuvsIvfSqIndexGetCenters(cuvsIvfSqIndex_t index, DLManagedTensor* centers);
 ```
 
 **Parameters**
@@ -269,17 +269,20 @@ CUVS_EXPORT cuvsError_t cuvsIvfSqIndexGetCenters(cuvsIvfSqIndex_t index, DLManag
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 ## IVF-SQ index build
 
 <a id="cuvsivfsqbuild"></a>
 ### cuvsIvfSqBuild
 
-Build an IVF-SQ index with a `DLManagedTensor` which has underlying `DLDeviceType` equal to `kDLCUDA`, `kDLCUDAHost`, `kDLCUDAManaged`, or `kDLCPU`. Also, acceptable underlying types are: 1. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32` 2. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 16`
+Build an IVF-SQ index with a `DLManagedTensor` which has underlying `DLDeviceType` equal to `kDLCUDA`, `kDLCUDAHost`, `kDLCUDAManaged`, or `kDLCPU`. Also, acceptable underlying types are:
+
+1. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32`
+2. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 16`
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsIvfSqBuild(cuvsResources_t res,
+cuvsError_t cuvsIvfSqBuild(cuvsResources_t res,
 cuvsIvfSqIndexParams_t index_params,
 DLManagedTensor* dataset,
 cuvsIvfSqIndex_t index);
@@ -296,17 +299,21 @@ cuvsIvfSqIndex_t index);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 ## IVF-SQ index search
 
 <a id="cuvsivfsqsearch"></a>
 ### cuvsIvfSqSearch
 
-Search an IVF-SQ index with a `DLManagedTensor` which has underlying `DLDeviceType` equal to `kDLCUDA`, `kDLCUDAHost`, `kDLCUDAManaged`. Types for input are: 1. `queries`: `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32` or 16 2. `neighbors`: `kDLDataType.code == kDLInt` and `kDLDataType.bits = 64` 3. `distances`: `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32`
+Search an IVF-SQ index with a `DLManagedTensor` which has underlying `DLDeviceType` equal to `kDLCUDA`, `kDLCUDAHost`, `kDLCUDAManaged`. Types for input are:
+
+1. `queries`: `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32` or 16
+2. `neighbors`: `kDLDataType.code == kDLInt` and `kDLDataType.bits = 64`
+3. `distances`: `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32`
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsIvfSqSearch(cuvsResources_t res,
+cuvsError_t cuvsIvfSqSearch(cuvsResources_t res,
 cuvsIvfSqSearchParams_t search_params,
 cuvsIvfSqIndex_t index,
 DLManagedTensor* queries,
@@ -329,7 +336,7 @@ cuvsFilter filter);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 ## IVF-SQ C-API serialize functions
 
@@ -339,7 +346,7 @@ cuvsFilter filter);
 Save the index to file.
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsIvfSqSerialize(cuvsResources_t res, const char* filename, cuvsIvfSqIndex_t index);
+cuvsError_t cuvsIvfSqSerialize(cuvsResources_t res, const char* filename, cuvsIvfSqIndex_t index);
 ```
 
 Experimental, both the API and the serialization format are subject to change.
@@ -354,7 +361,7 @@ Experimental, both the API and the serialization format are subject to change.
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsivfsqdeserialize"></a>
 ### cuvsIvfSqDeserialize
@@ -362,7 +369,7 @@ Experimental, both the API and the serialization format are subject to change.
 Load index from file.
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsIvfSqDeserialize(cuvsResources_t res,
+cuvsError_t cuvsIvfSqDeserialize(cuvsResources_t res,
 const char* filename,
 cuvsIvfSqIndex_t index);
 ```
@@ -379,7 +386,7 @@ Experimental, both the API and the serialization format are subject to change.
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 ## IVF-SQ index extend
 
@@ -389,7 +396,7 @@ Experimental, both the API and the serialization format are subject to change.
 Extend the index with the new data.
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsIvfSqExtend(cuvsResources_t res,
+cuvsError_t cuvsIvfSqExtend(cuvsResources_t res,
 DLManagedTensor* new_vectors,
 DLManagedTensor* new_indices,
 cuvsIvfSqIndex_t index);
@@ -406,4 +413,4 @@ cuvsIvfSqIndex_t index);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
diff --git a/fern/pages/c_api/c-api-neighbors-mg-cagra.md b/fern/pages/c_api/c-api-neighbors-mg-cagra.md
index 9f05edb51e..4bcb60dee4 100644
--- a/fern/pages/c_api/c-api-neighbors-mg-cagra.md
+++ b/fern/pages/c_api/c-api-neighbors-mg-cagra.md
@@ -35,7 +35,7 @@ struct cuvsMultiGpuCagraIndexParams {
 Allocate Multi-GPU CAGRA Index params, and populate with default values
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsMultiGpuCagraIndexParamsCreate(cuvsMultiGpuCagraIndexParams_t* index_params);
+cuvsError_t cuvsMultiGpuCagraIndexParamsCreate(cuvsMultiGpuCagraIndexParams_t* index_params);
 ```
 
 **Parameters**
@@ -46,7 +46,7 @@ CUVS_EXPORT cuvsError_t cuvsMultiGpuCagraIndexParamsCreate(cuvsMultiGpuCagraInde
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsmultigpucagraindexparamsdestroy"></a>
 ### cuvsMultiGpuCagraIndexParamsDestroy
@@ -54,7 +54,7 @@ CUVS_EXPORT cuvsError_t cuvsMultiGpuCagraIndexParamsCreate(cuvsMultiGpuCagraInde
 De-allocate Multi-GPU CAGRA Index params
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsMultiGpuCagraIndexParamsDestroy(cuvsMultiGpuCagraIndexParams_t index_params);
+cuvsError_t cuvsMultiGpuCagraIndexParamsDestroy(cuvsMultiGpuCagraIndexParams_t index_params);
 ```
 
 **Parameters**
@@ -65,7 +65,7 @@ CUVS_EXPORT cuvsError_t cuvsMultiGpuCagraIndexParamsDestroy(cuvsMultiGpuCagraInd
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 ## Multi-GPU CAGRA index search parameters
 
@@ -100,7 +100,7 @@ struct cuvsMultiGpuCagraSearchParams {
 Allocate Multi-GPU CAGRA search params, and populate with default values
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsMultiGpuCagraSearchParamsCreate(cuvsMultiGpuCagraSearchParams_t* params);
+cuvsError_t cuvsMultiGpuCagraSearchParamsCreate(cuvsMultiGpuCagraSearchParams_t* params);
 ```
 
 **Parameters**
@@ -111,7 +111,7 @@ CUVS_EXPORT cuvsError_t cuvsMultiGpuCagraSearchParamsCreate(cuvsMultiGpuCagraSea
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsmultigpucagrasearchparamsdestroy"></a>
 ### cuvsMultiGpuCagraSearchParamsDestroy
@@ -119,7 +119,7 @@ CUVS_EXPORT cuvsError_t cuvsMultiGpuCagraSearchParamsCreate(cuvsMultiGpuCagraSea
 De-allocate Multi-GPU CAGRA search params
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsMultiGpuCagraSearchParamsDestroy(cuvsMultiGpuCagraSearchParams_t params);
+cuvsError_t cuvsMultiGpuCagraSearchParamsDestroy(cuvsMultiGpuCagraSearchParams_t params);
 ```
 
 **Parameters**
@@ -130,7 +130,7 @@ CUVS_EXPORT cuvsError_t cuvsMultiGpuCagraSearchParamsDestroy(cuvsMultiGpuCagraSe
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 ## Multi-GPU CAGRA index
 
@@ -159,7 +159,7 @@ typedef struct {
 Allocate Multi-GPU CAGRA index
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsMultiGpuCagraIndexCreate(cuvsMultiGpuCagraIndex_t* index);
+cuvsError_t cuvsMultiGpuCagraIndexCreate(cuvsMultiGpuCagraIndex_t* index);
 ```
 
 **Parameters**
@@ -170,7 +170,7 @@ CUVS_EXPORT cuvsError_t cuvsMultiGpuCagraIndexCreate(cuvsMultiGpuCagraIndex_t* i
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsmultigpucagraindexdestroy"></a>
 ### cuvsMultiGpuCagraIndexDestroy
@@ -178,7 +178,7 @@ CUVS_EXPORT cuvsError_t cuvsMultiGpuCagraIndexCreate(cuvsMultiGpuCagraIndex_t* i
 De-allocate Multi-GPU CAGRA index
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsMultiGpuCagraIndexDestroy(cuvsMultiGpuCagraIndex_t index);
+cuvsError_t cuvsMultiGpuCagraIndexDestroy(cuvsMultiGpuCagraIndex_t index);
 ```
 
 **Parameters**
@@ -189,7 +189,7 @@ CUVS_EXPORT cuvsError_t cuvsMultiGpuCagraIndexDestroy(cuvsMultiGpuCagraIndex_t i
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 ## Multi-GPU CAGRA index build
 
@@ -199,7 +199,7 @@ CUVS_EXPORT cuvsError_t cuvsMultiGpuCagraIndexDestroy(cuvsMultiGpuCagraIndex_t i
 Build a Multi-GPU CAGRA index
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsMultiGpuCagraBuild(cuvsResources_t res,
+cuvsError_t cuvsMultiGpuCagraBuild(cuvsResources_t res,
 cuvsMultiGpuCagraIndexParams_t params,
 DLManagedTensor* dataset_tensor,
 cuvsMultiGpuCagraIndex_t index);
@@ -216,7 +216,7 @@ cuvsMultiGpuCagraIndex_t index);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 ## Multi-GPU CAGRA index search
 
@@ -226,7 +226,7 @@ cuvsMultiGpuCagraIndex_t index);
 Search a Multi-GPU CAGRA index
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsMultiGpuCagraSearch(cuvsResources_t res,
+cuvsError_t cuvsMultiGpuCagraSearch(cuvsResources_t res,
 cuvsMultiGpuCagraSearchParams_t params,
 cuvsMultiGpuCagraIndex_t index,
 DLManagedTensor* queries_tensor,
@@ -247,7 +247,7 @@ DLManagedTensor* distances_tensor);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 ## Multi-GPU CAGRA index extend
 
@@ -257,7 +257,7 @@ DLManagedTensor* distances_tensor);
 Extend a Multi-GPU CAGRA index
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsMultiGpuCagraExtend(cuvsResources_t res,
+cuvsError_t cuvsMultiGpuCagraExtend(cuvsResources_t res,
 cuvsMultiGpuCagraIndex_t index,
 DLManagedTensor* new_vectors_tensor,
 DLManagedTensor* new_indices_tensor);
@@ -274,7 +274,7 @@ DLManagedTensor* new_indices_tensor);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 ## Multi-GPU CAGRA index serialize
 
@@ -284,7 +284,7 @@ DLManagedTensor* new_indices_tensor);
 Serialize a Multi-GPU CAGRA index to file
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsMultiGpuCagraSerialize(cuvsResources_t res,
+cuvsError_t cuvsMultiGpuCagraSerialize(cuvsResources_t res,
 cuvsMultiGpuCagraIndex_t index,
 const char* filename);
 ```
@@ -299,7 +299,7 @@ const char* filename);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 ## Multi-GPU CAGRA index deserialize
 
@@ -309,7 +309,7 @@ const char* filename);
 Deserialize a Multi-GPU CAGRA index from file
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsMultiGpuCagraDeserialize(cuvsResources_t res,
+cuvsError_t cuvsMultiGpuCagraDeserialize(cuvsResources_t res,
 const char* filename,
 cuvsMultiGpuCagraIndex_t index);
 ```
@@ -324,7 +324,7 @@ cuvsMultiGpuCagraIndex_t index);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 ## Multi-GPU CAGRA index distribute
 
@@ -334,7 +334,7 @@ cuvsMultiGpuCagraIndex_t index);
 Distribute a local CAGRA index to create a Multi-GPU index
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsMultiGpuCagraDistribute(cuvsResources_t res,
+cuvsError_t cuvsMultiGpuCagraDistribute(cuvsResources_t res,
 const char* filename,
 cuvsMultiGpuCagraIndex_t index);
 ```
@@ -349,4 +349,4 @@ cuvsMultiGpuCagraIndex_t index);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
diff --git a/fern/pages/c_api/c-api-neighbors-mg-ivf-flat.md b/fern/pages/c_api/c-api-neighbors-mg-ivf-flat.md
index 8b578c708c..afcfe6bac8 100644
--- a/fern/pages/c_api/c-api-neighbors-mg-ivf-flat.md
+++ b/fern/pages/c_api/c-api-neighbors-mg-ivf-flat.md
@@ -35,7 +35,7 @@ struct cuvsMultiGpuIvfFlatIndexParams {
 Allocate Multi-GPU IVF-Flat Index params, and populate with default values
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsMultiGpuIvfFlatIndexParamsCreate(cuvsMultiGpuIvfFlatIndexParams_t* index_params);
+cuvsError_t cuvsMultiGpuIvfFlatIndexParamsCreate(cuvsMultiGpuIvfFlatIndexParams_t* index_params);
 ```
 
 **Parameters**
@@ -46,7 +46,7 @@ CUVS_EXPORT cuvsError_t cuvsMultiGpuIvfFlatIndexParamsCreate(cuvsMultiGpuIvfFlat
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsmultigpuivfflatindexparamsdestroy"></a>
 ### cuvsMultiGpuIvfFlatIndexParamsDestroy
@@ -54,7 +54,7 @@ CUVS_EXPORT cuvsError_t cuvsMultiGpuIvfFlatIndexParamsCreate(cuvsMultiGpuIvfFlat
 De-allocate Multi-GPU IVF-Flat Index params
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsMultiGpuIvfFlatIndexParamsDestroy(cuvsMultiGpuIvfFlatIndexParams_t index_params);
+cuvsError_t cuvsMultiGpuIvfFlatIndexParamsDestroy(cuvsMultiGpuIvfFlatIndexParams_t index_params);
 ```
 
 **Parameters**
@@ -65,7 +65,7 @@ CUVS_EXPORT cuvsError_t cuvsMultiGpuIvfFlatIndexParamsDestroy(cuvsMultiGpuIvfFla
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 ## Multi-GPU IVF-Flat index search parameters
 
@@ -100,7 +100,7 @@ struct cuvsMultiGpuIvfFlatSearchParams {
 Allocate Multi-GPU IVF-Flat search params, and populate with default values
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsMultiGpuIvfFlatSearchParamsCreate(cuvsMultiGpuIvfFlatSearchParams_t* params);
+cuvsError_t cuvsMultiGpuIvfFlatSearchParamsCreate(cuvsMultiGpuIvfFlatSearchParams_t* params);
 ```
 
 **Parameters**
@@ -111,7 +111,7 @@ CUVS_EXPORT cuvsError_t cuvsMultiGpuIvfFlatSearchParamsCreate(cuvsMultiGpuIvfFla
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsmultigpuivfflatsearchparamsdestroy"></a>
 ### cuvsMultiGpuIvfFlatSearchParamsDestroy
@@ -119,7 +119,7 @@ CUVS_EXPORT cuvsError_t cuvsMultiGpuIvfFlatSearchParamsCreate(cuvsMultiGpuIvfFla
 De-allocate Multi-GPU IVF-Flat search params
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsMultiGpuIvfFlatSearchParamsDestroy(cuvsMultiGpuIvfFlatSearchParams_t params);
+cuvsError_t cuvsMultiGpuIvfFlatSearchParamsDestroy(cuvsMultiGpuIvfFlatSearchParams_t params);
 ```
 
 **Parameters**
@@ -130,7 +130,7 @@ CUVS_EXPORT cuvsError_t cuvsMultiGpuIvfFlatSearchParamsDestroy(cuvsMultiGpuIvfFl
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 ## Multi-GPU IVF-Flat index
 
@@ -159,7 +159,7 @@ typedef struct {
 Allocate Multi-GPU IVF-Flat index
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsMultiGpuIvfFlatIndexCreate(cuvsMultiGpuIvfFlatIndex_t* index);
+cuvsError_t cuvsMultiGpuIvfFlatIndexCreate(cuvsMultiGpuIvfFlatIndex_t* index);
 ```
 
 **Parameters**
@@ -170,7 +170,7 @@ CUVS_EXPORT cuvsError_t cuvsMultiGpuIvfFlatIndexCreate(cuvsMultiGpuIvfFlatIndex_
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsmultigpuivfflatindexdestroy"></a>
 ### cuvsMultiGpuIvfFlatIndexDestroy
@@ -178,7 +178,7 @@ CUVS_EXPORT cuvsError_t cuvsMultiGpuIvfFlatIndexCreate(cuvsMultiGpuIvfFlatIndex_
 De-allocate Multi-GPU IVF-Flat index
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsMultiGpuIvfFlatIndexDestroy(cuvsMultiGpuIvfFlatIndex_t index);
+cuvsError_t cuvsMultiGpuIvfFlatIndexDestroy(cuvsMultiGpuIvfFlatIndex_t index);
 ```
 
 **Parameters**
@@ -189,7 +189,7 @@ CUVS_EXPORT cuvsError_t cuvsMultiGpuIvfFlatIndexDestroy(cuvsMultiGpuIvfFlatIndex
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 ## Multi-GPU IVF-Flat index build
 
@@ -199,7 +199,7 @@ CUVS_EXPORT cuvsError_t cuvsMultiGpuIvfFlatIndexDestroy(cuvsMultiGpuIvfFlatIndex
 Build a Multi-GPU IVF-Flat index
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsMultiGpuIvfFlatBuild(cuvsResources_t res,
+cuvsError_t cuvsMultiGpuIvfFlatBuild(cuvsResources_t res,
 cuvsMultiGpuIvfFlatIndexParams_t params,
 DLManagedTensor* dataset_tensor,
 cuvsMultiGpuIvfFlatIndex_t index);
@@ -216,7 +216,7 @@ cuvsMultiGpuIvfFlatIndex_t index);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 ## Multi-GPU IVF-Flat index search
 
@@ -226,7 +226,7 @@ cuvsMultiGpuIvfFlatIndex_t index);
 Search a Multi-GPU IVF-Flat index
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsMultiGpuIvfFlatSearch(cuvsResources_t res,
+cuvsError_t cuvsMultiGpuIvfFlatSearch(cuvsResources_t res,
 cuvsMultiGpuIvfFlatSearchParams_t params,
 cuvsMultiGpuIvfFlatIndex_t index,
 DLManagedTensor* queries_tensor,
@@ -247,7 +247,7 @@ DLManagedTensor* distances_tensor);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 ## Multi-GPU IVF-Flat index extend
 
@@ -257,7 +257,7 @@ DLManagedTensor* distances_tensor);
 Extend a Multi-GPU IVF-Flat index
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsMultiGpuIvfFlatExtend(cuvsResources_t res,
+cuvsError_t cuvsMultiGpuIvfFlatExtend(cuvsResources_t res,
 cuvsMultiGpuIvfFlatIndex_t index,
 DLManagedTensor* new_vectors_tensor,
 DLManagedTensor* new_indices_tensor);
@@ -274,7 +274,7 @@ DLManagedTensor* new_indices_tensor);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 ## Multi-GPU IVF-Flat index serialize
 
@@ -284,7 +284,7 @@ DLManagedTensor* new_indices_tensor);
 Serialize a Multi-GPU IVF-Flat index to file
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsMultiGpuIvfFlatSerialize(cuvsResources_t res,
+cuvsError_t cuvsMultiGpuIvfFlatSerialize(cuvsResources_t res,
 cuvsMultiGpuIvfFlatIndex_t index,
 const char* filename);
 ```
@@ -299,7 +299,7 @@ const char* filename);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 ## Multi-GPU IVF-Flat index deserialize
 
@@ -309,7 +309,7 @@ const char* filename);
 Deserialize a Multi-GPU IVF-Flat index from file
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsMultiGpuIvfFlatDeserialize(cuvsResources_t res,
+cuvsError_t cuvsMultiGpuIvfFlatDeserialize(cuvsResources_t res,
 const char* filename,
 cuvsMultiGpuIvfFlatIndex_t index);
 ```
@@ -324,7 +324,7 @@ cuvsMultiGpuIvfFlatIndex_t index);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 ## Multi-GPU IVF-Flat index distribute
 
@@ -334,7 +334,7 @@ cuvsMultiGpuIvfFlatIndex_t index);
 Distribute a local IVF-Flat index to create a Multi-GPU index
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsMultiGpuIvfFlatDistribute(cuvsResources_t res,
+cuvsError_t cuvsMultiGpuIvfFlatDistribute(cuvsResources_t res,
 const char* filename,
 cuvsMultiGpuIvfFlatIndex_t index);
 ```
@@ -349,4 +349,4 @@ cuvsMultiGpuIvfFlatIndex_t index);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
diff --git a/fern/pages/c_api/c-api-neighbors-mg-ivf-pq.md b/fern/pages/c_api/c-api-neighbors-mg-ivf-pq.md
index ad30dcc36e..8452b48872 100644
--- a/fern/pages/c_api/c-api-neighbors-mg-ivf-pq.md
+++ b/fern/pages/c_api/c-api-neighbors-mg-ivf-pq.md
@@ -35,7 +35,7 @@ struct cuvsMultiGpuIvfPqIndexParams {
 Allocate Multi-GPU IVF-PQ Index params, and populate with default values
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsMultiGpuIvfPqIndexParamsCreate(cuvsMultiGpuIvfPqIndexParams_t* index_params);
+cuvsError_t cuvsMultiGpuIvfPqIndexParamsCreate(cuvsMultiGpuIvfPqIndexParams_t* index_params);
 ```
 
 **Parameters**
@@ -46,7 +46,7 @@ CUVS_EXPORT cuvsError_t cuvsMultiGpuIvfPqIndexParamsCreate(cuvsMultiGpuIvfPqInde
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsmultigpuivfpqindexparamsdestroy"></a>
 ### cuvsMultiGpuIvfPqIndexParamsDestroy
@@ -54,7 +54,7 @@ CUVS_EXPORT cuvsError_t cuvsMultiGpuIvfPqIndexParamsCreate(cuvsMultiGpuIvfPqInde
 De-allocate Multi-GPU IVF-PQ Index params
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsMultiGpuIvfPqIndexParamsDestroy(cuvsMultiGpuIvfPqIndexParams_t index_params);
+cuvsError_t cuvsMultiGpuIvfPqIndexParamsDestroy(cuvsMultiGpuIvfPqIndexParams_t index_params);
 ```
 
 **Parameters**
@@ -65,7 +65,7 @@ CUVS_EXPORT cuvsError_t cuvsMultiGpuIvfPqIndexParamsDestroy(cuvsMultiGpuIvfPqInd
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 ## Multi-GPU IVF-PQ index search parameters
 
@@ -100,7 +100,7 @@ struct cuvsMultiGpuIvfPqSearchParams {
 Allocate Multi-GPU IVF-PQ search params, and populate with default values
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsMultiGpuIvfPqSearchParamsCreate(cuvsMultiGpuIvfPqSearchParams_t* params);
+cuvsError_t cuvsMultiGpuIvfPqSearchParamsCreate(cuvsMultiGpuIvfPqSearchParams_t* params);
 ```
 
 **Parameters**
@@ -111,7 +111,7 @@ CUVS_EXPORT cuvsError_t cuvsMultiGpuIvfPqSearchParamsCreate(cuvsMultiGpuIvfPqSea
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsmultigpuivfpqsearchparamsdestroy"></a>
 ### cuvsMultiGpuIvfPqSearchParamsDestroy
@@ -119,7 +119,7 @@ CUVS_EXPORT cuvsError_t cuvsMultiGpuIvfPqSearchParamsCreate(cuvsMultiGpuIvfPqSea
 De-allocate Multi-GPU IVF-PQ search params
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsMultiGpuIvfPqSearchParamsDestroy(cuvsMultiGpuIvfPqSearchParams_t params);
+cuvsError_t cuvsMultiGpuIvfPqSearchParamsDestroy(cuvsMultiGpuIvfPqSearchParams_t params);
 ```
 
 **Parameters**
@@ -130,7 +130,7 @@ CUVS_EXPORT cuvsError_t cuvsMultiGpuIvfPqSearchParamsDestroy(cuvsMultiGpuIvfPqSe
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 ## Multi-GPU IVF-PQ index
 
@@ -159,7 +159,7 @@ typedef struct {
 Allocate Multi-GPU IVF-PQ index
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsMultiGpuIvfPqIndexCreate(cuvsMultiGpuIvfPqIndex_t* index);
+cuvsError_t cuvsMultiGpuIvfPqIndexCreate(cuvsMultiGpuIvfPqIndex_t* index);
 ```
 
 **Parameters**
@@ -170,7 +170,7 @@ CUVS_EXPORT cuvsError_t cuvsMultiGpuIvfPqIndexCreate(cuvsMultiGpuIvfPqIndex_t* i
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsmultigpuivfpqindexdestroy"></a>
 ### cuvsMultiGpuIvfPqIndexDestroy
@@ -178,7 +178,7 @@ CUVS_EXPORT cuvsError_t cuvsMultiGpuIvfPqIndexCreate(cuvsMultiGpuIvfPqIndex_t* i
 De-allocate Multi-GPU IVF-PQ index
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsMultiGpuIvfPqIndexDestroy(cuvsMultiGpuIvfPqIndex_t index);
+cuvsError_t cuvsMultiGpuIvfPqIndexDestroy(cuvsMultiGpuIvfPqIndex_t index);
 ```
 
 **Parameters**
@@ -189,7 +189,7 @@ CUVS_EXPORT cuvsError_t cuvsMultiGpuIvfPqIndexDestroy(cuvsMultiGpuIvfPqIndex_t i
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 ## Multi-GPU IVF-PQ index build
 
@@ -199,7 +199,7 @@ CUVS_EXPORT cuvsError_t cuvsMultiGpuIvfPqIndexDestroy(cuvsMultiGpuIvfPqIndex_t i
 Build a Multi-GPU IVF-PQ index
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsMultiGpuIvfPqBuild(cuvsResources_t res,
+cuvsError_t cuvsMultiGpuIvfPqBuild(cuvsResources_t res,
 cuvsMultiGpuIvfPqIndexParams_t params,
 DLManagedTensor* dataset_tensor,
 cuvsMultiGpuIvfPqIndex_t index);
@@ -216,7 +216,7 @@ cuvsMultiGpuIvfPqIndex_t index);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 ## Multi-GPU IVF-PQ index search
 
@@ -226,7 +226,7 @@ cuvsMultiGpuIvfPqIndex_t index);
 Search a Multi-GPU IVF-PQ index
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsMultiGpuIvfPqSearch(cuvsResources_t res,
+cuvsError_t cuvsMultiGpuIvfPqSearch(cuvsResources_t res,
 cuvsMultiGpuIvfPqSearchParams_t params,
 cuvsMultiGpuIvfPqIndex_t index,
 DLManagedTensor* queries_tensor,
@@ -247,7 +247,7 @@ DLManagedTensor* distances_tensor);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 ## Multi-GPU IVF-PQ index extend
 
@@ -257,7 +257,7 @@ DLManagedTensor* distances_tensor);
 Extend a Multi-GPU IVF-PQ index
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsMultiGpuIvfPqExtend(cuvsResources_t res,
+cuvsError_t cuvsMultiGpuIvfPqExtend(cuvsResources_t res,
 cuvsMultiGpuIvfPqIndex_t index,
 DLManagedTensor* new_vectors_tensor,
 DLManagedTensor* new_indices_tensor);
@@ -274,7 +274,7 @@ DLManagedTensor* new_indices_tensor);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 ## Multi-GPU IVF-PQ index serialize
 
@@ -284,7 +284,7 @@ DLManagedTensor* new_indices_tensor);
 Serialize a Multi-GPU IVF-PQ index to file
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsMultiGpuIvfPqSerialize(cuvsResources_t res,
+cuvsError_t cuvsMultiGpuIvfPqSerialize(cuvsResources_t res,
 cuvsMultiGpuIvfPqIndex_t index,
 const char* filename);
 ```
@@ -299,7 +299,7 @@ const char* filename);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 ## Multi-GPU IVF-PQ index deserialize
 
@@ -309,7 +309,7 @@ const char* filename);
 Deserialize a Multi-GPU IVF-PQ index from file
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsMultiGpuIvfPqDeserialize(cuvsResources_t res,
+cuvsError_t cuvsMultiGpuIvfPqDeserialize(cuvsResources_t res,
 const char* filename,
 cuvsMultiGpuIvfPqIndex_t index);
 ```
@@ -324,7 +324,7 @@ cuvsMultiGpuIvfPqIndex_t index);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 ## Multi-GPU IVF-PQ index distribute
 
@@ -334,7 +334,7 @@ cuvsMultiGpuIvfPqIndex_t index);
 Distribute a local IVF-PQ index to create a Multi-GPU index
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsMultiGpuIvfPqDistribute(cuvsResources_t res,
+cuvsError_t cuvsMultiGpuIvfPqDistribute(cuvsResources_t res,
 const char* filename,
 cuvsMultiGpuIvfPqIndex_t index);
 ```
@@ -349,4 +349,4 @@ cuvsMultiGpuIvfPqIndex_t index);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
diff --git a/fern/pages/c_api/c-api-neighbors-nn-descent.md b/fern/pages/c_api/c-api-neighbors-nn-descent.md
index 689dca1fba..cba7535ff5 100644
--- a/fern/pages/c_api/c-api-neighbors-nn-descent.md
+++ b/fern/pages/c_api/c-api-neighbors-nn-descent.md
@@ -11,7 +11,7 @@ _Source header: `cuvs/neighbors/nn_descent.h`_
 <a id="cuvsnndescentdistcompdtype"></a>
 ### cuvsNNDescentDistCompDtype
 
-Dtype to use for distance computation - `NND_DIST_COMP_AUTO`: Automatically determine the best dtype for distance computation based on the dataset dimensions. - `NND_DIST_COMP_FP32`: Use fp32 distance computation for better precision at the cost of performance and memory usage. - `NND_DIST_COMP_FP16`: Use fp16 distance computation.
+Dtype to use for distance computation
 
 ```c
 typedef enum {
@@ -23,11 +23,11 @@ typedef enum {
 
 **Values**
 
-| Name | Value |
-| --- | --- |
-| `NND_DIST_COMP_AUTO` | `0` |
-| `NND_DIST_COMP_FP32` | `1` |
-| `NND_DIST_COMP_FP16` | `2` |
+| Name | Value | Description |
+| --- | --- | --- |
+| `NND_DIST_COMP_AUTO` | `0` | Automatically determine the best dtype for distance computation based on the dataset dimensions. |
+| `NND_DIST_COMP_FP32` | `1` | Use fp32 distance computation for better precision at the cost of performance and memory usage. |
+| `NND_DIST_COMP_FP16` | `2` | Use fp16 distance computation. |
 
 ## The nn-descent algorithm parameters.
 
@@ -60,7 +60,7 @@ struct cuvsNNDescentIndexParams {
 | `max_iterations` | `size_t` | The number of iterations that nn-descent will refine the graph for. More iterations produce a better quality graph at cost of performance |
 | `termination_threshold` | `float` | The delta at which nn-descent will terminate its iterations |
 | `return_distances` | `bool` | Boolean to decide whether to return distances array |
-| `dist_comp_dtype` | [`cuvsNNDescentDistCompDtype`](/api-reference/c-api-neighbors-nn-descent#cuvsnndescentdistcompdtype) | dtype to use for distance computation. Defaults to `NND_DIST_COMP_AUTO` which automatically determines the best dtype for distance computation based on the dataset dimensions. Use `NND_DIST_COMP_FP32` for better precision at the cost of performance and memory usage. This option is only valid when data type is fp32. Use `NND_DIST_COMP_FP16` for better performance and memory usage at the cost of precision. |
+| `dist_comp_dtype` | [`cuvsNNDescentDistCompDtype`](/api-reference/c-api-neighbors-nn-descent#cuvsnndescentdistcompdtype) | dtype to use for distance computation.<br />Defaults to `NND_DIST_COMP_AUTO` which automatically determines the best dtype for distance computation based on the dataset dimensions.<br />Use `NND_DIST_COMP_FP32` for better precision at the cost of performance and memory usage. This option is only valid when data type is fp32.<br />Use `NND_DIST_COMP_FP16` for better performance and memory usage at the cost of precision. |
 
 <a id="cuvsnndescentindexparamscreate"></a>
 ### cuvsNNDescentIndexParamsCreate
@@ -68,7 +68,7 @@ struct cuvsNNDescentIndexParams {
 Allocate NN-Descent Index params, and populate with default values
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsNNDescentIndexParamsCreate(cuvsNNDescentIndexParams_t* index_params);
+cuvsError_t cuvsNNDescentIndexParamsCreate(cuvsNNDescentIndexParams_t* index_params);
 ```
 
 **Parameters**
@@ -79,7 +79,7 @@ CUVS_EXPORT cuvsError_t cuvsNNDescentIndexParamsCreate(cuvsNNDescentIndexParams_
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsnndescentindexparamsdestroy"></a>
 ### cuvsNNDescentIndexParamsDestroy
@@ -87,7 +87,7 @@ CUVS_EXPORT cuvsError_t cuvsNNDescentIndexParamsCreate(cuvsNNDescentIndexParams_
 De-allocate NN-Descent Index params
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsNNDescentIndexParamsDestroy(cuvsNNDescentIndexParams_t index_params);
+cuvsError_t cuvsNNDescentIndexParamsDestroy(cuvsNNDescentIndexParams_t index_params);
 ```
 
 **Parameters**
@@ -98,7 +98,7 @@ CUVS_EXPORT cuvsError_t cuvsNNDescentIndexParamsDestroy(cuvsNNDescentIndexParams
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 ## NN-Descent index
 
@@ -127,7 +127,7 @@ typedef struct {
 Allocate NN-Descent index
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsNNDescentIndexCreate(cuvsNNDescentIndex_t* index);
+cuvsError_t cuvsNNDescentIndexCreate(cuvsNNDescentIndex_t* index);
 ```
 
 **Parameters**
@@ -138,7 +138,7 @@ CUVS_EXPORT cuvsError_t cuvsNNDescentIndexCreate(cuvsNNDescentIndex_t* index);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsnndescentindexdestroy"></a>
 ### cuvsNNDescentIndexDestroy
@@ -146,7 +146,7 @@ CUVS_EXPORT cuvsError_t cuvsNNDescentIndexCreate(cuvsNNDescentIndex_t* index);
 De-allocate NN-Descent index
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsNNDescentIndexDestroy(cuvsNNDescentIndex_t index);
+cuvsError_t cuvsNNDescentIndexDestroy(cuvsNNDescentIndex_t index);
 ```
 
 **Parameters**
@@ -157,17 +157,22 @@ CUVS_EXPORT cuvsError_t cuvsNNDescentIndexDestroy(cuvsNNDescentIndex_t index);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 ## NN-Descent index build
 
 <a id="cuvsnndescentbuild"></a>
 ### cuvsNNDescentBuild
 
-Build a NN-Descent index with a `DLManagedTensor` which has underlying `DLDeviceType` equal to `kDLCUDA`, `kDLCUDAHost`, `kDLCUDAManaged`, or `kDLCPU`. Also, acceptable underlying types are: 1. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32` 2. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 16` 3. `kDLDataType.code == kDLInt` and `kDLDataType.bits = 8` 4. `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 8`
+Build a NN-Descent index with a `DLManagedTensor` which has underlying `DLDeviceType` equal to `kDLCUDA`, `kDLCUDAHost`, `kDLCUDAManaged`, or `kDLCPU`. Also, acceptable underlying types are:
+
+1. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32`
+2. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 16`
+3. `kDLDataType.code == kDLInt` and `kDLDataType.bits = 8`
+4. `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 8`
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsNNDescentBuild(cuvsResources_t res,
+cuvsError_t cuvsNNDescentBuild(cuvsResources_t res,
 cuvsNNDescentIndexParams_t index_params,
 DLManagedTensor* dataset,
 DLManagedTensor* graph,
@@ -186,4 +191,4 @@ cuvsNNDescentIndex_t index);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
diff --git a/fern/pages/c_api/c-api-neighbors-refine.md b/fern/pages/c_api/c-api-neighbors-refine.md
index eea9d78518..37992ef017 100644
--- a/fern/pages/c_api/c-api-neighbors-refine.md
+++ b/fern/pages/c_api/c-api-neighbors-refine.md
@@ -14,7 +14,7 @@ _Source header: `cuvs/neighbors/refine.h`_
 Refine nearest neighbor search.
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsRefine(cuvsResources_t res,
+cuvsError_t cuvsRefine(cuvsResources_t res,
 DLManagedTensor* dataset,
 DLManagedTensor* queries,
 DLManagedTensor* candidates,
@@ -39,4 +39,4 @@ Refinement is an operation that follows an approximate NN search. The approximat
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
diff --git a/fern/pages/c_api/c-api-neighbors-tiered-index.md b/fern/pages/c_api/c-api-neighbors-tiered-index.md
index d7bb28fbf7..79acfc6c23 100644
--- a/fern/pages/c_api/c-api-neighbors-tiered-index.md
+++ b/fern/pages/c_api/c-api-neighbors-tiered-index.md
@@ -58,7 +58,7 @@ typedef struct {
 Allocate Tiered Index
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsTieredIndexCreate(cuvsTieredIndex_t* index);
+cuvsError_t cuvsTieredIndexCreate(cuvsTieredIndex_t* index);
 ```
 
 **Parameters**
@@ -69,7 +69,7 @@ CUVS_EXPORT cuvsError_t cuvsTieredIndexCreate(cuvsTieredIndex_t* index);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvstieredindexdestroy"></a>
 ### cuvsTieredIndexDestroy
@@ -77,7 +77,7 @@ CUVS_EXPORT cuvsError_t cuvsTieredIndexCreate(cuvsTieredIndex_t* index);
 De-allocate Tiered index
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsTieredIndexDestroy(cuvsTieredIndex_t index);
+cuvsError_t cuvsTieredIndexDestroy(cuvsTieredIndex_t index);
 ```
 
 **Parameters**
@@ -88,7 +88,7 @@ CUVS_EXPORT cuvsError_t cuvsTieredIndexDestroy(cuvsTieredIndex_t index);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 ## Tiered Index build parameters
 
@@ -127,7 +127,7 @@ struct cuvsTieredIndexParams {
 Allocate Tiered Index Params and populate with default values
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsTieredIndexParamsCreate(cuvsTieredIndexParams_t* index_params);
+cuvsError_t cuvsTieredIndexParamsCreate(cuvsTieredIndexParams_t* index_params);
 ```
 
 **Parameters**
@@ -138,7 +138,7 @@ CUVS_EXPORT cuvsError_t cuvsTieredIndexParamsCreate(cuvsTieredIndexParams_t* ind
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvstieredindexparamsdestroy"></a>
 ### cuvsTieredIndexParamsDestroy
@@ -146,7 +146,7 @@ CUVS_EXPORT cuvsError_t cuvsTieredIndexParamsCreate(cuvsTieredIndexParams_t* ind
 De-allocate Tiered Index params
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsTieredIndexParamsDestroy(cuvsTieredIndexParams_t index_params);
+cuvsError_t cuvsTieredIndexParamsDestroy(cuvsTieredIndexParams_t index_params);
 ```
 
 **Parameters**
@@ -157,17 +157,20 @@ CUVS_EXPORT cuvsError_t cuvsTieredIndexParamsDestroy(cuvsTieredIndexParams_t ind
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 ## Tiered index build
 
 <a id="cuvstieredindexbuild"></a>
 ### cuvsTieredIndexBuild
 
-Build a TieredIndex index with a `DLManagedTensor` which has underlying `DLDeviceType` equal to `kDLCUDA`, `kDLCUDAHost`, `kDLCUDAManaged`, or `kDLCPU`. Also, acceptable underlying types are: 1. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32` 2. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 16`
+Build a TieredIndex index with a `DLManagedTensor` which has underlying `DLDeviceType` equal to `kDLCUDA`, `kDLCUDAHost`, `kDLCUDAManaged`, or `kDLCPU`. Also, acceptable underlying types are:
+
+1. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32`
+2. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 16`
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsTieredIndexBuild(cuvsResources_t res,
+cuvsError_t cuvsTieredIndexBuild(cuvsResources_t res,
 cuvsTieredIndexParams_t index_params,
 DLManagedTensor* dataset,
 cuvsTieredIndex_t index);
@@ -184,7 +187,7 @@ cuvsTieredIndex_t index);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 ## Tiered index search
 
@@ -194,7 +197,7 @@ cuvsTieredIndex_t index);
 Search a TieredIndex index with a `DLManagedTensor`
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsTieredIndexSearch(cuvsResources_t res,
+cuvsError_t cuvsTieredIndexSearch(cuvsResources_t res,
 void* search_params,
 cuvsTieredIndex_t index,
 DLManagedTensor* queries,
@@ -217,7 +220,7 @@ cuvsFilter prefilter);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 ## Tiered index extend
 
@@ -227,7 +230,7 @@ cuvsFilter prefilter);
 Extend the index with the new data.
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsTieredIndexExtend(cuvsResources_t res,
+cuvsError_t cuvsTieredIndexExtend(cuvsResources_t res,
 DLManagedTensor* new_vectors,
 cuvsTieredIndex_t index);
 ```
@@ -242,7 +245,7 @@ cuvsTieredIndex_t index);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 ## Tiered index merge
 
@@ -252,7 +255,7 @@ cuvsTieredIndex_t index);
 Merge multiple indices together into a single index
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsTieredIndexMerge(cuvsResources_t res,
+cuvsError_t cuvsTieredIndexMerge(cuvsResources_t res,
 cuvsTieredIndexParams_t index_params,
 cuvsTieredIndex_t* indices,
 size_t num_indices,
@@ -271,4 +274,4 @@ cuvsTieredIndex_t output_index);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
diff --git a/fern/pages/c_api/c-api-neighbors-vamana.md b/fern/pages/c_api/c-api-neighbors-vamana.md
index a0a103dca7..d70db45305 100644
--- a/fern/pages/c_api/c-api-neighbors-vamana.md
+++ b/fern/pages/c_api/c-api-neighbors-vamana.md
@@ -47,7 +47,7 @@ struct cuvsVamanaIndexParams {
 Allocate Vamana Index params, and populate with default values
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsVamanaIndexParamsCreate(cuvsVamanaIndexParams_t* params);
+cuvsError_t cuvsVamanaIndexParamsCreate(cuvsVamanaIndexParams_t* params);
 ```
 
 **Parameters**
@@ -58,7 +58,7 @@ CUVS_EXPORT cuvsError_t cuvsVamanaIndexParamsCreate(cuvsVamanaIndexParams_t* par
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsvamanaindexparamsdestroy"></a>
 ### cuvsVamanaIndexParamsDestroy
@@ -66,7 +66,7 @@ CUVS_EXPORT cuvsError_t cuvsVamanaIndexParamsCreate(cuvsVamanaIndexParams_t* par
 De-allocate Vamana Index params
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsVamanaIndexParamsDestroy(cuvsVamanaIndexParams_t params);
+cuvsError_t cuvsVamanaIndexParamsDestroy(cuvsVamanaIndexParams_t params);
 ```
 
 **Parameters**
@@ -77,7 +77,7 @@ CUVS_EXPORT cuvsError_t cuvsVamanaIndexParamsDestroy(cuvsVamanaIndexParams_t par
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 ## Vamana index
 
@@ -106,7 +106,7 @@ typedef struct {
 Allocate Vamana index
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsVamanaIndexCreate(cuvsVamanaIndex_t* index);
+cuvsError_t cuvsVamanaIndexCreate(cuvsVamanaIndex_t* index);
 ```
 
 **Parameters**
@@ -117,7 +117,7 @@ CUVS_EXPORT cuvsError_t cuvsVamanaIndexCreate(cuvsVamanaIndex_t* index);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsvamanaindexdestroy"></a>
 ### cuvsVamanaIndexDestroy
@@ -125,7 +125,7 @@ CUVS_EXPORT cuvsError_t cuvsVamanaIndexCreate(cuvsVamanaIndex_t* index);
 De-allocate Vamana index
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsVamanaIndexDestroy(cuvsVamanaIndex_t index);
+cuvsError_t cuvsVamanaIndexDestroy(cuvsVamanaIndex_t index);
 ```
 
 **Parameters**
@@ -136,7 +136,7 @@ CUVS_EXPORT cuvsError_t cuvsVamanaIndexDestroy(cuvsVamanaIndex_t index);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsvamanaindexgetdims"></a>
 ### cuvsVamanaIndexGetDims
@@ -144,7 +144,7 @@ CUVS_EXPORT cuvsError_t cuvsVamanaIndexDestroy(cuvsVamanaIndex_t index);
 Get the dimension of the index
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsVamanaIndexGetDims(cuvsVamanaIndex_t index, int* dim);
+cuvsError_t cuvsVamanaIndexGetDims(cuvsVamanaIndex_t index, int* dim);
 ```
 
 **Parameters**
@@ -156,7 +156,7 @@ CUVS_EXPORT cuvsError_t cuvsVamanaIndexGetDims(cuvsVamanaIndex_t index, int* dim
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 ## Vamana index build
 
@@ -166,7 +166,7 @@ CUVS_EXPORT cuvsError_t cuvsVamanaIndexGetDims(cuvsVamanaIndex_t index, int* dim
 Build Vamana index
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsVamanaBuild(cuvsResources_t res,
+cuvsError_t cuvsVamanaBuild(cuvsResources_t res,
 cuvsVamanaIndexParams_t params,
 DLManagedTensor* dataset,
 cuvsVamanaIndex_t index);
@@ -193,7 +193,7 @@ Usage example:
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 ## Vamana index serialize
 
@@ -203,7 +203,7 @@ Usage example:
 Save Vamana index to file
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsVamanaSerialize(cuvsResources_t res,
+cuvsError_t cuvsVamanaSerialize(cuvsResources_t res,
 const char* filename,
 cuvsVamanaIndex_t index,
 bool include_dataset);
@@ -224,4 +224,4 @@ Serialized Index is to be used by the DiskANN open-source repository for graph s
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
diff --git a/fern/pages/c_api/c-api-preprocessing-pca.md b/fern/pages/c_api/c-api-preprocessing-pca.md
index 6900395af4..4bfb7d7b51 100644
--- a/fern/pages/c_api/c-api-preprocessing-pca.md
+++ b/fern/pages/c_api/c-api-preprocessing-pca.md
@@ -60,7 +60,7 @@ struct cuvsPcaParams {
 Allocate PCA params and populate with default values.
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsPcaParamsCreate(cuvsPcaParams_t* params);
+cuvsError_t cuvsPcaParamsCreate(cuvsPcaParams_t* params);
 ```
 
 **Parameters**
@@ -71,7 +71,7 @@ CUVS_EXPORT cuvsError_t cuvsPcaParamsCreate(cuvsPcaParams_t* params);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvspcaparamsdestroy"></a>
 ### cuvsPcaParamsDestroy
@@ -79,7 +79,7 @@ CUVS_EXPORT cuvsError_t cuvsPcaParamsCreate(cuvsPcaParams_t* params);
 De-allocate PCA params.
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsPcaParamsDestroy(cuvsPcaParams_t params);
+cuvsError_t cuvsPcaParamsDestroy(cuvsPcaParams_t params);
 ```
 
 **Parameters**
@@ -90,7 +90,7 @@ CUVS_EXPORT cuvsError_t cuvsPcaParamsDestroy(cuvsPcaParams_t params);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvspcafit"></a>
 ### cuvsPcaFit
@@ -98,7 +98,7 @@ CUVS_EXPORT cuvsError_t cuvsPcaParamsDestroy(cuvsPcaParams_t params);
 Perform PCA fit operation.
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsPcaFit(cuvsResources_t res,
+cuvsError_t cuvsPcaFit(cuvsResources_t res,
 cuvsPcaParams_t params,
 DLManagedTensor* input,
 DLManagedTensor* components,
@@ -129,7 +129,7 @@ Computes the principal components, explained variances, singular values, and col
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvspcafittransform"></a>
 ### cuvsPcaFitTransform
@@ -137,7 +137,7 @@ Computes the principal components, explained variances, singular values, and col
 Perform PCA fit and transform in a single operation.
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsPcaFitTransform(cuvsResources_t res,
+cuvsError_t cuvsPcaFitTransform(cuvsResources_t res,
 cuvsPcaParams_t params,
 DLManagedTensor* input,
 DLManagedTensor* trans_input,
@@ -170,7 +170,7 @@ Computes the principal components and transforms the input data into the eigensp
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvspcatransform"></a>
 ### cuvsPcaTransform
@@ -178,7 +178,7 @@ Computes the principal components and transforms the input data into the eigensp
 Perform PCA transform operation.
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsPcaTransform(cuvsResources_t res,
+cuvsError_t cuvsPcaTransform(cuvsResources_t res,
 cuvsPcaParams_t params,
 DLManagedTensor* input,
 DLManagedTensor* components,
@@ -203,7 +203,7 @@ Transforms the input data into the eigenspace using previously computed principa
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvspcainversetransform"></a>
 ### cuvsPcaInverseTransform
@@ -211,7 +211,7 @@ Transforms the input data into the eigenspace using previously computed principa
 Perform PCA inverse transform operation.
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsPcaInverseTransform(cuvsResources_t res,
+cuvsError_t cuvsPcaInverseTransform(cuvsResources_t res,
 cuvsPcaParams_t params,
 DLManagedTensor* trans_input,
 DLManagedTensor* components,
@@ -236,4 +236,4 @@ Transforms data from the eigenspace back to the original space.
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
diff --git a/fern/pages/c_api/c-api-preprocessing-quantize-binary.md b/fern/pages/c_api/c-api-preprocessing-quantize-binary.md
index b5f57e3f3f..2d3dd89fac 100644
--- a/fern/pages/c_api/c-api-preprocessing-quantize-binary.md
+++ b/fern/pages/c_api/c-api-preprocessing-quantize-binary.md
@@ -54,7 +54,7 @@ struct cuvsBinaryQuantizerParams {
 Allocate Binary Quantizer params, and populate with default values
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsBinaryQuantizerParamsCreate(cuvsBinaryQuantizerParams_t* params);
+cuvsError_t cuvsBinaryQuantizerParamsCreate(cuvsBinaryQuantizerParams_t* params);
 ```
 
 **Parameters**
@@ -65,7 +65,7 @@ CUVS_EXPORT cuvsError_t cuvsBinaryQuantizerParamsCreate(cuvsBinaryQuantizerParam
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsbinaryquantizerparamsdestroy"></a>
 ### cuvsBinaryQuantizerParamsDestroy
@@ -73,7 +73,7 @@ CUVS_EXPORT cuvsError_t cuvsBinaryQuantizerParamsCreate(cuvsBinaryQuantizerParam
 De-allocate Binary Quantizer params
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsBinaryQuantizerParamsDestroy(cuvsBinaryQuantizerParams_t params);
+cuvsError_t cuvsBinaryQuantizerParamsDestroy(cuvsBinaryQuantizerParams_t params);
 ```
 
 **Parameters**
@@ -84,7 +84,7 @@ CUVS_EXPORT cuvsError_t cuvsBinaryQuantizerParamsDestroy(cuvsBinaryQuantizerPara
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsbinaryquantizer"></a>
 ### cuvsBinaryQuantizer
@@ -113,7 +113,7 @@ typedef struct {
 Allocate Binary Quantizer and populate with default values
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsBinaryQuantizerCreate(cuvsBinaryQuantizer_t* quantizer);
+cuvsError_t cuvsBinaryQuantizerCreate(cuvsBinaryQuantizer_t* quantizer);
 ```
 
 **Parameters**
@@ -124,7 +124,7 @@ CUVS_EXPORT cuvsError_t cuvsBinaryQuantizerCreate(cuvsBinaryQuantizer_t* quantiz
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsbinaryquantizerdestroy"></a>
 ### cuvsBinaryQuantizerDestroy
@@ -132,7 +132,7 @@ CUVS_EXPORT cuvsError_t cuvsBinaryQuantizerCreate(cuvsBinaryQuantizer_t* quantiz
 De-allocate Binary Quantizer
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsBinaryQuantizerDestroy(cuvsBinaryQuantizer_t quantizer);
+cuvsError_t cuvsBinaryQuantizerDestroy(cuvsBinaryQuantizer_t quantizer);
 ```
 
 **Parameters**
@@ -143,7 +143,7 @@ CUVS_EXPORT cuvsError_t cuvsBinaryQuantizerDestroy(cuvsBinaryQuantizer_t quantiz
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsbinaryquantizertrain"></a>
 ### cuvsBinaryQuantizerTrain
@@ -151,7 +151,7 @@ CUVS_EXPORT cuvsError_t cuvsBinaryQuantizerDestroy(cuvsBinaryQuantizer_t quantiz
 Trains a binary quantizer to be used later for quantizing the dataset.
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsBinaryQuantizerTrain(cuvsResources_t res,
+cuvsError_t cuvsBinaryQuantizerTrain(cuvsResources_t res,
 cuvsBinaryQuantizerParams_t params,
 DLManagedTensor* dataset,
 cuvsBinaryQuantizer_t quantizer);
@@ -168,7 +168,7 @@ cuvsBinaryQuantizer_t quantizer);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsbinaryquantizertransform"></a>
 ### cuvsBinaryQuantizerTransform
@@ -176,7 +176,7 @@ cuvsBinaryQuantizer_t quantizer);
 Applies binary quantization transform to the given dataset
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsBinaryQuantizerTransform(cuvsResources_t res,
+cuvsError_t cuvsBinaryQuantizerTransform(cuvsResources_t res,
 DLManagedTensor* dataset,
 DLManagedTensor* out);
 ```
@@ -193,7 +193,7 @@ This applies binary quantization to a dataset, changing any positive values to a
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsbinaryquantizertransformwithparams"></a>
 ### cuvsBinaryQuantizerTransformWithParams
@@ -201,7 +201,7 @@ This applies binary quantization to a dataset, changing any positive values to a
 Applies binary quantization transform to the given dataset
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsBinaryQuantizerTransformWithParams(cuvsResources_t res,
+cuvsError_t cuvsBinaryQuantizerTransformWithParams(cuvsResources_t res,
 cuvsBinaryQuantizer_t quantizer,
 DLManagedTensor* dataset,
 DLManagedTensor* out);
@@ -220,4 +220,4 @@ This applies binary quantization to a dataset, changing any values that are larg
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
diff --git a/fern/pages/c_api/c-api-preprocessing-quantize-pq.md b/fern/pages/c_api/c-api-preprocessing-quantize-pq.md
index 43bfbdbc86..47287711bb 100644
--- a/fern/pages/c_api/c-api-preprocessing-quantize-pq.md
+++ b/fern/pages/c_api/c-api-preprocessing-quantize-pq.md
@@ -47,7 +47,7 @@ struct cuvsProductQuantizerParams {
 Allocate Product Quantizer params, and populate with default values
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsProductQuantizerParamsCreate(cuvsProductQuantizerParams_t* params);
+cuvsError_t cuvsProductQuantizerParamsCreate(cuvsProductQuantizerParams_t* params);
 ```
 
 **Parameters**
@@ -58,7 +58,7 @@ CUVS_EXPORT cuvsError_t cuvsProductQuantizerParamsCreate(cuvsProductQuantizerPar
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsproductquantizerparamsdestroy"></a>
 ### cuvsProductQuantizerParamsDestroy
@@ -66,7 +66,7 @@ CUVS_EXPORT cuvsError_t cuvsProductQuantizerParamsCreate(cuvsProductQuantizerPar
 De-allocate Product Quantizer params
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsProductQuantizerParamsDestroy(cuvsProductQuantizerParams_t params);
+cuvsError_t cuvsProductQuantizerParamsDestroy(cuvsProductQuantizerParams_t params);
 ```
 
 **Parameters**
@@ -77,7 +77,7 @@ CUVS_EXPORT cuvsError_t cuvsProductQuantizerParamsDestroy(cuvsProductQuantizerPa
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsproductquantizer"></a>
 ### cuvsProductQuantizer
@@ -106,7 +106,7 @@ typedef struct {
 Allocate Product Quantizer
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsProductQuantizerCreate(cuvsProductQuantizer_t* quantizer);
+cuvsError_t cuvsProductQuantizerCreate(cuvsProductQuantizer_t* quantizer);
 ```
 
 **Parameters**
@@ -117,7 +117,7 @@ CUVS_EXPORT cuvsError_t cuvsProductQuantizerCreate(cuvsProductQuantizer_t* quant
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsproductquantizerdestroy"></a>
 ### cuvsProductQuantizerDestroy
@@ -125,7 +125,7 @@ CUVS_EXPORT cuvsError_t cuvsProductQuantizerCreate(cuvsProductQuantizer_t* quant
 De-allocate Product Quantizer
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsProductQuantizerDestroy(cuvsProductQuantizer_t quantizer);
+cuvsError_t cuvsProductQuantizerDestroy(cuvsProductQuantizer_t quantizer);
 ```
 
 **Parameters**
@@ -136,7 +136,7 @@ CUVS_EXPORT cuvsError_t cuvsProductQuantizerDestroy(cuvsProductQuantizer_t quant
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsproductquantizerbuild"></a>
 ### cuvsProductQuantizerBuild
@@ -144,7 +144,7 @@ CUVS_EXPORT cuvsError_t cuvsProductQuantizerDestroy(cuvsProductQuantizer_t quant
 Builds a product quantizer to be used later for quantizing the dataset.
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsProductQuantizerBuild(cuvsResources_t res,
+cuvsError_t cuvsProductQuantizerBuild(cuvsResources_t res,
 cuvsProductQuantizerParams_t params,
 DLManagedTensor* dataset,
 cuvsProductQuantizer_t quantizer);
@@ -161,7 +161,7 @@ cuvsProductQuantizer_t quantizer);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsproductquantizertransform"></a>
 ### cuvsProductQuantizerTransform
@@ -169,7 +169,7 @@ cuvsProductQuantizer_t quantizer);
 Applies product quantization transform to the given dataset
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsProductQuantizerTransform(cuvsResources_t res,
+cuvsError_t cuvsProductQuantizerTransform(cuvsResources_t res,
 cuvsProductQuantizer_t quantizer,
 DLManagedTensor* dataset,
 DLManagedTensor* codes_out,
@@ -190,7 +190,7 @@ This applies product quantization to a dataset.
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsproductquantizerinversetransform"></a>
 ### cuvsProductQuantizerInverseTransform
@@ -198,7 +198,7 @@ This applies product quantization to a dataset.
 Applies product quantization inverse transform to the given quantized codes
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsProductQuantizerInverseTransform(cuvsResources_t res,
+cuvsError_t cuvsProductQuantizerInverseTransform(cuvsResources_t res,
 cuvsProductQuantizer_t quantizer,
 DLManagedTensor* pq_codes,
 DLManagedTensor* out,
@@ -219,7 +219,7 @@ This applies product quantization inverse transform to the given quantized codes
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsproductquantizergetpqbits"></a>
 ### cuvsProductQuantizerGetPqBits
@@ -227,7 +227,7 @@ This applies product quantization inverse transform to the given quantized codes
 Get the bit length of the vector element after compression by PQ.
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsProductQuantizerGetPqBits(cuvsProductQuantizer_t quantizer, uint32_t* pq_bits);
+cuvsError_t cuvsProductQuantizerGetPqBits(cuvsProductQuantizer_t quantizer, uint32_t* pq_bits);
 ```
 
 **Parameters**
@@ -239,7 +239,7 @@ CUVS_EXPORT cuvsError_t cuvsProductQuantizerGetPqBits(cuvsProductQuantizer_t qua
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsproductquantizergetpqdim"></a>
 ### cuvsProductQuantizerGetPqDim
@@ -247,7 +247,7 @@ CUVS_EXPORT cuvsError_t cuvsProductQuantizerGetPqBits(cuvsProductQuantizer_t qua
 Get the dimensionality of the vector after compression by PQ.
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsProductQuantizerGetPqDim(cuvsProductQuantizer_t quantizer, uint32_t* pq_dim);
+cuvsError_t cuvsProductQuantizerGetPqDim(cuvsProductQuantizer_t quantizer, uint32_t* pq_dim);
 ```
 
 **Parameters**
@@ -259,7 +259,7 @@ CUVS_EXPORT cuvsError_t cuvsProductQuantizerGetPqDim(cuvsProductQuantizer_t quan
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsproductquantizergetpqcodebook"></a>
 ### cuvsProductQuantizerGetPqCodebook
@@ -267,7 +267,7 @@ CUVS_EXPORT cuvsError_t cuvsProductQuantizerGetPqDim(cuvsProductQuantizer_t quan
 Get the PQ codebook.
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsProductQuantizerGetPqCodebook(cuvsProductQuantizer_t quantizer,
+cuvsError_t cuvsProductQuantizerGetPqCodebook(cuvsProductQuantizer_t quantizer,
 DLManagedTensor* pq_codebook);
 ```
 
@@ -280,7 +280,7 @@ DLManagedTensor* pq_codebook);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsproductquantizergetvqcodebook"></a>
 ### cuvsProductQuantizerGetVqCodebook
@@ -288,7 +288,7 @@ DLManagedTensor* pq_codebook);
 Get the VQ codebook.
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsProductQuantizerGetVqCodebook(cuvsProductQuantizer_t quantizer,
+cuvsError_t cuvsProductQuantizerGetVqCodebook(cuvsProductQuantizer_t quantizer,
 DLManagedTensor* vq_codebook);
 ```
 
@@ -301,7 +301,7 @@ DLManagedTensor* vq_codebook);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsproductquantizergetencodeddim"></a>
 ### cuvsProductQuantizerGetEncodedDim
@@ -309,7 +309,7 @@ DLManagedTensor* vq_codebook);
 Get the encoded dimension of the quantized dataset.
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsProductQuantizerGetEncodedDim(cuvsProductQuantizer_t quantizer,
+cuvsError_t cuvsProductQuantizerGetEncodedDim(cuvsProductQuantizer_t quantizer,
 uint32_t* encoded_dim);
 ```
 
@@ -322,7 +322,7 @@ uint32_t* encoded_dim);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsproductquantizergetusevq"></a>
 ### cuvsProductQuantizerGetUseVq
@@ -330,7 +330,7 @@ uint32_t* encoded_dim);
 Get whether VQ is used.
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsProductQuantizerGetUseVq(cuvsProductQuantizer_t quantizer, bool* use_vq);
+cuvsError_t cuvsProductQuantizerGetUseVq(cuvsProductQuantizer_t quantizer, bool* use_vq);
 ```
 
 **Parameters**
@@ -342,4 +342,4 @@ CUVS_EXPORT cuvsError_t cuvsProductQuantizerGetUseVq(cuvsProductQuantizer_t quan
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
diff --git a/fern/pages/c_api/c-api-preprocessing-quantize-scalar.md b/fern/pages/c_api/c-api-preprocessing-quantize-scalar.md
index 23d7d23211..86aacd7bce 100644
--- a/fern/pages/c_api/c-api-preprocessing-quantize-scalar.md
+++ b/fern/pages/c_api/c-api-preprocessing-quantize-scalar.md
@@ -23,7 +23,7 @@ struct cuvsScalarQuantizerParams;
 Allocate Scalar Quantizer params, and populate with default values
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsScalarQuantizerParamsCreate(cuvsScalarQuantizerParams_t* params);
+cuvsError_t cuvsScalarQuantizerParamsCreate(cuvsScalarQuantizerParams_t* params);
 ```
 
 **Parameters**
@@ -34,7 +34,7 @@ CUVS_EXPORT cuvsError_t cuvsScalarQuantizerParamsCreate(cuvsScalarQuantizerParam
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsscalarquantizerparamsdestroy"></a>
 ### cuvsScalarQuantizerParamsDestroy
@@ -42,7 +42,7 @@ CUVS_EXPORT cuvsError_t cuvsScalarQuantizerParamsCreate(cuvsScalarQuantizerParam
 De-allocate Scalar Quantizer params
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsScalarQuantizerParamsDestroy(cuvsScalarQuantizerParams_t params);
+cuvsError_t cuvsScalarQuantizerParamsDestroy(cuvsScalarQuantizerParams_t params);
 ```
 
 **Parameters**
@@ -53,7 +53,7 @@ CUVS_EXPORT cuvsError_t cuvsScalarQuantizerParamsDestroy(cuvsScalarQuantizerPara
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsscalarquantizer"></a>
 ### cuvsScalarQuantizer
@@ -82,7 +82,7 @@ typedef struct {
 Allocate Scalar Quantizer and populate with default values
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsScalarQuantizerCreate(cuvsScalarQuantizer_t* quantizer);
+cuvsError_t cuvsScalarQuantizerCreate(cuvsScalarQuantizer_t* quantizer);
 ```
 
 **Parameters**
@@ -93,7 +93,7 @@ CUVS_EXPORT cuvsError_t cuvsScalarQuantizerCreate(cuvsScalarQuantizer_t* quantiz
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsscalarquantizerdestroy"></a>
 ### cuvsScalarQuantizerDestroy
@@ -101,7 +101,7 @@ CUVS_EXPORT cuvsError_t cuvsScalarQuantizerCreate(cuvsScalarQuantizer_t* quantiz
 De-allocate Scalar Quantizer
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsScalarQuantizerDestroy(cuvsScalarQuantizer_t quantizer);
+cuvsError_t cuvsScalarQuantizerDestroy(cuvsScalarQuantizer_t quantizer);
 ```
 
 **Parameters**
@@ -112,7 +112,7 @@ CUVS_EXPORT cuvsError_t cuvsScalarQuantizerDestroy(cuvsScalarQuantizer_t quantiz
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsscalarquantizertrain"></a>
 ### cuvsScalarQuantizerTrain
@@ -120,7 +120,7 @@ CUVS_EXPORT cuvsError_t cuvsScalarQuantizerDestroy(cuvsScalarQuantizer_t quantiz
 Trains a scalar quantizer to be used later for quantizing the dataset.
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsScalarQuantizerTrain(cuvsResources_t res,
+cuvsError_t cuvsScalarQuantizerTrain(cuvsResources_t res,
 cuvsScalarQuantizerParams_t params,
 DLManagedTensor* dataset,
 cuvsScalarQuantizer_t quantizer);
@@ -137,7 +137,7 @@ cuvsScalarQuantizer_t quantizer);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsscalarquantizertransform"></a>
 ### cuvsScalarQuantizerTransform
@@ -145,7 +145,7 @@ cuvsScalarQuantizer_t quantizer);
 Applies quantization transform to given dataset
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsScalarQuantizerTransform(cuvsResources_t res,
+cuvsError_t cuvsScalarQuantizerTransform(cuvsResources_t res,
 cuvsScalarQuantizer_t quantizer,
 DLManagedTensor* dataset,
 DLManagedTensor* out);
@@ -162,7 +162,7 @@ DLManagedTensor* out);
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
 <a id="cuvsscalarquantizerinversetransform"></a>
 ### cuvsScalarQuantizerInverseTransform
@@ -170,7 +170,7 @@ DLManagedTensor* out);
 Perform inverse quantization step on previously quantized dataset
 
 ```c
-CUVS_EXPORT cuvsError_t cuvsScalarQuantizerInverseTransform(cuvsResources_t res,
+cuvsError_t cuvsScalarQuantizerInverseTransform(cuvsResources_t res,
 cuvsScalarQuantizer_t quantizer,
 DLManagedTensor* dataset,
 DLManagedTensor* out);
@@ -189,4 +189,4 @@ Note that depending on the chosen data types train dataset the conversion is not
 
 **Returns**
 
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
+[`cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
diff --git a/fern/pages/cpp_api/cpp-api-cluster-agglomerative.md b/fern/pages/cpp_api/cpp-api-cluster-agglomerative.md
index e15a598c37..f31edd72a3 100644
--- a/fern/pages/cpp_api/cpp-api-cluster-agglomerative.md
+++ b/fern/pages/cpp_api/cpp-api-cluster-agglomerative.md
@@ -72,8 +72,8 @@ scale the algorithm beyond the n^2 memory consumption of implementations that us
 | `labels` | out | `raft::device_vector_view<int, int>` | output labels vector (size n_rows) |
 | `metric` | in | [`cuvs::distance::DistanceType`](/api-reference/cpp-api-distance-distance#distance-distancetype) | distance metric to use when constructing connectivities graph |
 | `n_clusters` | in | `size_t` | number of clusters to assign data samples |
-| `linkage` | in | [`cuvs::cluster::agglomerative::Linkage`](/api-reference/cpp-api-cluster-agglomerative#cluster-agglomerative-linkage) | strategy for constructing the linkage. PAIRWISE uses more memory but can be faster for smaller datasets. KNN_GRAPH allows the memory usage to be controlled (using parameter c) at the expense of potentially additional minimum spanning tree iterations. Default: `cuvs::cluster::agglomerative::Linkage::KNN_GRAPH`. |
-| `c` | in | `std::optional<int>` | a constant used when constructing linkage from knn graph. Allows the indirect control of k. The algorithm will set `k = log(n) + c` Default: `std::make_optional&lt;int&gt;(DEFAULT_CONST_C)`. |
+| `linkage` | in | [`cuvs::cluster::agglomerative::Linkage`](/api-reference/cpp-api-cluster-agglomerative#cluster-agglomerative-linkage) | strategy for constructing the linkage. PAIRWISE uses more memory but can be faster for smaller datasets. KNN_GRAPH allows the memory usage to be controlled (using parameter c) at the expense of potentially additional minimum spanning tree iterations.<br />Default: `cuvs::cluster::agglomerative::Linkage::KNN_GRAPH`. |
+| `c` | in | `std::optional<int>` | a constant used when constructing linkage from knn graph. Allows the indirect control of k. The algorithm will set `k = log(n) + c`<br />Default: `std::make_optional&lt;int&gt;(DEFAULT_CONST_C)`. |
 
 **Returns**
 
diff --git a/fern/pages/cpp_api/cpp-api-cluster-kmeans.md b/fern/pages/cpp_api/cpp-api-cluster-kmeans.md
index 5e5e473c44..cbb5a73de7 100644
--- a/fern/pages/cpp_api/cpp-api-cluster-kmeans.md
+++ b/fern/pages/cpp_api/cpp-api-cluster-kmeans.md
@@ -61,10 +61,10 @@ struct params : base_params {
 | `rng_state` | `raft::random::RngState` | Seed to the random number generator. |
 | `n_init` | `int` | Number of instance k-means algorithm will be run with different seeds. |
 | `oversampling_factor` | `double` | Oversampling factor for use in the k-means\|\| algorithm |
-| `batch_samples` | `int` | batch_samples and batch_centroids are used to tile 1NN computation which is useful to optimize/control the memory footprint Default tile is [batch_samples x n_clusters] i.e. when batch_centroids is 0 then don't tile the centroids<br /><br />NB: These parameters are unrelated to streaming_batch_size, which controls how many samples to transfer from host to device per batch when processing out-of-core data. |
+| `batch_samples` | `int` | batch_samples and batch_centroids are used to tile 1NN computation which is useful to optimize/control the memory footprint<br />Default tile is [batch_samples x n_clusters] i.e. when batch_centroids is 0 then don't tile the centroids<br /><br />NB: These parameters are unrelated to streaming_batch_size, which controls how many samples to transfer from host to device per batch when processing out-of-core data. |
 | `batch_centroids` | `int` | if 0 then batch_centroids = n_clusters |
 | `init_size` | `int64_t` | Number of samples to randomly draw for the KMeansPlusPlus initialization step. A random subset of this size is used for centroid seeding.<br /><br />Only applies when dataset is on host; for device data the full dataset is always used for seeding and this parameter is ignored.<br /><br />When set to 0 (default) with host data uses `min(3 * n_clusters, n_samples)` as a default.<br /><br />In Batched multi-GPU host-data fits, the effective KMeansPlusPlus initialization sample is materialized on device on every rank. Every rank must have enough GPU memory for this sample, and rank 0 must also have enough GPU memory for the seeding workspace.<br /><br />Default: 0. |
-| `streaming_batch_size` | `int64_t` | Number of samples to process per GPU batch when fitting with host data. When set to 0, defaults to n_samples (process all at once). Only used by the batched (host-data) code path and ignored by device-data overloads.<br /><br />In multi-GPU mode, this is a per-rank batch size. Each rank processes up to this many local samples per batch, clamped to that rank's local sample count. Default: 0 (process all data at once). |
+| `streaming_batch_size` | `int64_t` | Number of samples to process per GPU batch when fitting with host data. When set to 0, defaults to n_samples (process all at once). Only used by the batched (host-data) code path and ignored by device-data overloads.<br /><br />In multi-GPU mode, this is a per-rank batch size. Each rank processes up to this many local samples per batch, clamped to that rank's local sample count.<br />Default: 0 (process all data at once). |
 
 <a id="cluster-kmeans-balanced-params"></a>
 ### cluster::kmeans::balanced_params
@@ -354,7 +354,7 @@ std::optional<raft::host_scalar_view<float>> inertia = std::nullopt);
 | `params` | in | [`cuvs::cluster::kmeans::balanced_params const&`](/api-reference/cpp-api-cluster-kmeans#cluster-kmeans-balanced-params) | Parameters for KMeans model. |
 | `X` | in | `raft::device_matrix_view<const float, int64_t>` | Training instances to cluster. The data must be in row-major format. [dim = n_samples x n_features] |
 | `centroids` | out | `raft::device_matrix_view<float, int64_t>` | [out] The generated centroids from the kmeans algorithm are stored at the address pointed by 'centroids'. [dim = n_clusters x n_features] |
-| `inertia` | out | `std::optional<raft::host_scalar_view<float>>` | Sum of squared distances of samples to their closest cluster center. Default: `std::nullopt`. |
+| `inertia` | out | `std::optional<raft::host_scalar_view<float>>` | Sum of squared distances of samples to their closest cluster center.<br />Default: `std::nullopt`. |
 
 **Returns**
 
@@ -380,7 +380,7 @@ std::optional<raft::host_scalar_view<float>> inertia = std::nullopt);
 | `params` | in | [`cuvs::cluster::kmeans::balanced_params const&`](/api-reference/cpp-api-cluster-kmeans#cluster-kmeans-balanced-params) | Parameters for KMeans model. |
 | `X` | in | `raft::device_matrix_view<const int8_t, int64_t>` | Training instances to cluster. The data must be in row-major format. [dim = n_samples x n_features] |
 | `centroids` | inout | `raft::device_matrix_view<float, int64_t>` | [out] The generated centroids from the kmeans algorithm are stored at the address pointed by 'centroids'. [dim = n_clusters x n_features] |
-| `inertia` | out | `std::optional<raft::host_scalar_view<float>>` | Sum of squared distances of samples to their closest cluster center. Default: `std::nullopt`. |
+| `inertia` | out | `std::optional<raft::host_scalar_view<float>>` | Sum of squared distances of samples to their closest cluster center.<br />Default: `std::nullopt`. |
 
 **Returns**
 
@@ -406,7 +406,7 @@ std::optional<raft::host_scalar_view<float>> inertia = std::nullopt);
 | `params` | in | [`cuvs::cluster::kmeans::balanced_params const&`](/api-reference/cpp-api-cluster-kmeans#cluster-kmeans-balanced-params) | Parameters for KMeans model. |
 | `X` | in | `raft::device_matrix_view<const half, int64_t>` | Training instances to cluster. The data must be in row-major format. [dim = n_samples x n_features] |
 | `centroids` | inout | `raft::device_matrix_view<float, int64_t>` | [out] The generated centroids from the kmeans algorithm are stored at the address pointed by 'centroids'. [dim = n_clusters x n_features] |
-| `inertia` | out | `std::optional<raft::host_scalar_view<float>>` | Sum of squared distances of samples to their closest cluster center. Default: `std::nullopt`. |
+| `inertia` | out | `std::optional<raft::host_scalar_view<float>>` | Sum of squared distances of samples to their closest cluster center.<br />Default: `std::nullopt`. |
 
 **Returns**
 
@@ -432,7 +432,7 @@ std::optional<raft::host_scalar_view<float>> inertia = std::nullopt);
 | `params` | in | [`cuvs::cluster::kmeans::balanced_params const&`](/api-reference/cpp-api-cluster-kmeans#cluster-kmeans-balanced-params) | Parameters for KMeans model. |
 | `X` | in | `raft::device_matrix_view<const uint8_t, int64_t>` | Training instances to cluster. The data must be in row-major format. [dim = n_samples x n_features] |
 | `centroids` | inout | `raft::device_matrix_view<float, int64_t>` | [out] The generated centroids from the kmeans algorithm are stored at the address pointed by 'centroids'. [dim = n_clusters x n_features] |
-| `inertia` | out | `std::optional<raft::host_scalar_view<float>>` | Sum of squared distances of samples to their closest cluster center. Default: `std::nullopt`. |
+| `inertia` | out | `std::optional<raft::host_scalar_view<float>>` | Sum of squared distances of samples to their closest cluster center.<br />Default: `std::nullopt`. |
 
 **Returns**
 
@@ -979,7 +979,7 @@ std::optional<raft::device_vector_view<const float, int>> sample_weight = std::n
 | `X` | in | `raft::device_matrix_view<const float, int>` | Training instances to cluster. The data must be in row-major format. [dim = n_samples x n_features] |
 | `centroids` | in | `raft::device_matrix_view<const float, int>` | Cluster centroids. The data must be in row-major format. [dim = n_clusters x n_features] |
 | `cost` | out | `raft::host_scalar_view<float>` | Resulting cluster cost |
-| `sample_weight` | in | `std::optional<raft::device_vector_view<const float, int>>` | Optional per-sample weights. [len = n_samples] Default: `std::nullopt`. |
+| `sample_weight` | in | `std::optional<raft::device_vector_view<const float, int>>` | Optional per-sample weights. [len = n_samples]<br />Default: `std::nullopt`. |
 
 **Returns**
 
@@ -1006,7 +1006,7 @@ std::optional<raft::device_vector_view<const double, int>> sample_weight = std::
 | `X` | in | `raft::device_matrix_view<const double, int>` | Training instances to cluster. The data must be in row-major format. [dim = n_samples x n_features] |
 | `centroids` | in | `raft::device_matrix_view<const double, int>` | Cluster centroids. The data must be in row-major format. [dim = n_clusters x n_features] |
 | `cost` | out | `raft::host_scalar_view<double>` | Resulting cluster cost |
-| `sample_weight` | in | `std::optional<raft::device_vector_view<const double, int>>` | Optional per-sample weights. [len = n_samples] Default: `std::nullopt`. |
+| `sample_weight` | in | `std::optional<raft::device_vector_view<const double, int>>` | Optional per-sample weights. [len = n_samples]<br />Default: `std::nullopt`. |
 
 **Returns**
 
@@ -1033,7 +1033,7 @@ std::optional<raft::device_vector_view<const float, int64_t>> sample_weight = st
 | `X` | in | `raft::device_matrix_view<const float, int64_t>` | Training instances to cluster. The data must be in row-major format. [dim = n_samples x n_features] |
 | `centroids` | in | `raft::device_matrix_view<const float, int64_t>` | Cluster centroids. The data must be in row-major format. [dim = n_clusters x n_features] |
 | `cost` | out | `raft::host_scalar_view<float>` | Resulting cluster cost |
-| `sample_weight` | in | `std::optional<raft::device_vector_view<const float, int64_t>>` | Optional per-sample weights. [len = n_samples] Default: `std::nullopt`. |
+| `sample_weight` | in | `std::optional<raft::device_vector_view<const float, int64_t>>` | Optional per-sample weights. [len = n_samples]<br />Default: `std::nullopt`. |
 
 **Returns**
 
@@ -1060,7 +1060,7 @@ std::optional<raft::device_vector_view<const double, int64_t>> sample_weight = s
 | `X` | in | `raft::device_matrix_view<const double, int64_t>` | Training instances to cluster. The data must be in row-major format. [dim = n_samples x n_features] |
 | `centroids` | in | `raft::device_matrix_view<const double, int64_t>` | Cluster centroids. The data must be in row-major format. [dim = n_clusters x n_features] |
 | `cost` | out | `raft::host_scalar_view<double>` | Resulting cluster cost |
-| `sample_weight` | in | `std::optional<raft::device_vector_view<const double, int64_t>>` | Optional per-sample weights. [len = n_samples] Default: `std::nullopt`. |
+| `sample_weight` | in | `std::optional<raft::device_vector_view<const double, int64_t>>` | Optional per-sample weights. [len = n_samples]<br />Default: `std::nullopt`. |
 
 **Returns**
 
@@ -1095,9 +1095,9 @@ float tol   = 1e-3);
 | `inertia` |  | `raft::host_scalar_view<float>` | inertia of best k found |
 | `n_iter` |  | `raft::host_scalar_view<int>` | number of iterations used to find best k |
 | `kmax` |  | `int` | maximum k to try in search |
-| `kmin` |  | `int` | minimum k to try in search (should be &gt;= 1) Default: `1`. |
-| `maxiter` |  | `int` | maximum number of iterations to run Default: `100`. |
-| `tol` |  | `float` | tolerance for early stopping convergence Default: `1e-3`. |
+| `kmin` |  | `int` | minimum k to try in search (should be &gt;= 1)<br />Default: `1`. |
+| `maxiter` |  | `int` | maximum number of iterations to run<br />Default: `100`. |
+| `tol` |  | `float` | tolerance for early stopping convergence<br />Default: `1e-3`. |
 
 **Returns**
 
diff --git a/fern/pages/cpp_api/cpp-api-distance-distance.md b/fern/pages/cpp_api/cpp-api-distance-distance.md
index 0bf215165f..e399d10e2f 100644
--- a/fern/pages/cpp_api/cpp-api-distance-distance.md
+++ b/fern/pages/cpp_api/cpp-api-distance-distance.md
@@ -154,7 +154,7 @@ Usage example:
 | `y` | in | `raft::device_matrix_view<const float, std::int64_t, raft::layout_c_contiguous> const` | second set of points (size m*k) |
 | `dist` | out | `raft::device_matrix_view<float, std::int64_t, raft::layout_c_contiguous>` | output distance matrix (size n*m) |
 | `metric` | in | [`cuvs::distance::DistanceType`](/api-reference/cpp-api-distance-distance#distance-distancetype) | distance to evaluate |
-| `metric_arg` | in | `float` | metric argument (used for Minkowski distance) Default: `2.0f`. |
+| `metric_arg` | in | `float` | metric argument (used for Minkowski distance)<br />Default: `2.0f`. |
 
 **Returns**
 
@@ -187,7 +187,7 @@ Usage example:
 | `y` | in | `raft::device_matrix_view<const double, std::int64_t, raft::layout_c_contiguous> const` | second set of points (size m*k) |
 | `dist` | out | `raft::device_matrix_view<double, std::int64_t, raft::layout_c_contiguous>` | output distance matrix (size n*m) |
 | `metric` | in | [`cuvs::distance::DistanceType`](/api-reference/cpp-api-distance-distance#distance-distancetype) | distance to evaluate |
-| `metric_arg` | in | `double` | metric argument (used for Minkowski distance) Default: `2.0f`. |
+| `metric_arg` | in | `double` | metric argument (used for Minkowski distance)<br />Default: `2.0f`. |
 
 **Returns**
 
@@ -220,7 +220,7 @@ Usage example:
 | `y` | in | `raft::device_matrix_view<const half, std::int64_t, raft::layout_c_contiguous> const` | second set of points (size m*k) |
 | `dist` | out | `raft::device_matrix_view<float, std::int64_t, raft::layout_c_contiguous>` | output distance matrix (size n*m) |
 | `metric` | in | [`cuvs::distance::DistanceType`](/api-reference/cpp-api-distance-distance#distance-distancetype) | distance to evaluate |
-| `metric_arg` | in | `float` | metric argument (used for Minkowski distance) Default: `2.0f`. |
+| `metric_arg` | in | `float` | metric argument (used for Minkowski distance)<br />Default: `2.0f`. |
 
 **Returns**
 
@@ -253,7 +253,7 @@ Usage example:
 | `y` | in | `raft::device_matrix_view<const float, std::int64_t, raft::layout_f_contiguous> const` | second set of points (size m*k) |
 | `dist` | out | `raft::device_matrix_view<float, std::int64_t, raft::layout_f_contiguous>` | output distance matrix (size n*m) |
 | `metric` | in | [`cuvs::distance::DistanceType`](/api-reference/cpp-api-distance-distance#distance-distancetype) | distance to evaluate |
-| `metric_arg` | in | `float` | metric argument (used for Minkowski distance) Default: `2.0f`. |
+| `metric_arg` | in | `float` | metric argument (used for Minkowski distance)<br />Default: `2.0f`. |
 
 **Returns**
 
@@ -286,7 +286,7 @@ Usage example:
 | `y` | in | `raft::device_matrix_view<const double, std::int64_t, raft::layout_f_contiguous> const` | second set of points (size m*k) |
 | `dist` | out | `raft::device_matrix_view<double, std::int64_t, raft::layout_f_contiguous>` | output distance matrix (size n*m) |
 | `metric` | in | [`cuvs::distance::DistanceType`](/api-reference/cpp-api-distance-distance#distance-distancetype) | distance to evaluate |
-| `metric_arg` | in | `double` | metric argument (used for Minkowski distance) Default: `2.0f`. |
+| `metric_arg` | in | `double` | metric argument (used for Minkowski distance)<br />Default: `2.0f`. |
 
 **Returns**
 
@@ -319,7 +319,7 @@ Usage example:
 | `y` | in | `raft::device_matrix_view<const half, std::int64_t, raft::layout_f_contiguous> const` | second set of points (size m*k) |
 | `dist` | out | `raft::device_matrix_view<float, std::int64_t, raft::layout_f_contiguous>` | output distance matrix (size n*m) |
 | `metric` | in | [`cuvs::distance::DistanceType`](/api-reference/cpp-api-distance-distance#distance-distancetype) | distance to evaluate |
-| `metric_arg` | in | `float` | metric argument (used for Minkowski distance) Default: `2.0f`. |
+| `metric_arg` | in | `float` | metric argument (used for Minkowski distance)<br />Default: `2.0f`. |
 
 **Returns**
 
@@ -347,7 +347,7 @@ float metric_arg = 2.0f);
 | `y` | in | `raft::device_csr_matrix_view<const float, int, int, int>` | raft::device_csr_matrix_view |
 | `dist` | out | `raft::device_matrix_view<float, int, raft::row_major>` | raft::device_matrix_view dense matrix |
 | `metric` | in | [`cuvs::distance::DistanceType`](/api-reference/cpp-api-distance-distance#distance-distancetype) | distance metric to use |
-| `metric_arg` | in | `float` | metric argument (used for Minkowski distance) Default: `2.0f`. |
+| `metric_arg` | in | `float` | metric argument (used for Minkowski distance)<br />Default: `2.0f`. |
 
 **Returns**
 
@@ -375,7 +375,7 @@ float metric_arg = 2.0f);
 | `y` | in | `raft::device_csr_matrix_view<const double, int, int, int>` | raft::device_csr_matrix_view |
 | `dist` | out | `raft::device_matrix_view<double, int, raft::row_major>` | raft::device_matrix_view dense matrix |
 | `metric` | in | [`cuvs::distance::DistanceType`](/api-reference/cpp-api-distance-distance#distance-distancetype) | distance metric to use |
-| `metric_arg` | in | `float` | metric argument (used for Minkowski distance) Default: `2.0f`. |
+| `metric_arg` | in | `float` | metric argument (used for Minkowski distance)<br />Default: `2.0f`. |
 
 **Returns**
 
diff --git a/fern/pages/cpp_api/cpp-api-neighbors-all-neighbors.md b/fern/pages/cpp_api/cpp-api-neighbors-all-neighbors.md
index 90fe26ea51..5569e3ffbc 100644
--- a/fern/pages/cpp_api/cpp-api-neighbors-all-neighbors.md
+++ b/fern/pages/cpp_api/cpp-api-neighbors-all-neighbors.md
@@ -70,9 +70,9 @@ Usage example:
 | `params` | in | [`const all_neighbors_params&`](/api-reference/cpp-api-neighbors-all-neighbors#neighbors-all-neighbors-all-neighbors-params) | an instance of all_neighbors::all_neighbors_params that are parameters to build all-neighbors knn graph |
 | `dataset` | in | `raft::host_matrix_view<const float, int64_t, row_major>` | raft::host_matrix_view input dataset expected to be located in host memory |
 | `indices` | out | `raft::device_matrix_view<int64_t, int64_t, row_major>` | nearest neighbor indices of shape [n_row x k] |
-| `distances` | out | `std::optional<raft::device_matrix_view<float, int64_t, row_major>>` | nearest neighbor distances [n_row x k] Default: `std::nullopt`. |
-| `core_distances` | out | `std::optional<raft::device_vector_view<float, int64_t, row_major>>` | array for core distances of size [n_row]. Requires distances matrix to compute core_distances. If core_distances is given, the resulting indices and distances will be mutual reachability space. Default: `std::nullopt`. |
-| `alpha` | in | `float` | distance scaling parameter as used in robust single linkage. Default: `1.0`. |
+| `distances` | out | `std::optional<raft::device_matrix_view<float, int64_t, row_major>>` | nearest neighbor distances [n_row x k]<br />Default: `std::nullopt`. |
+| `core_distances` | out | `std::optional<raft::device_vector_view<float, int64_t, row_major>>` | array for core distances of size [n_row]. Requires distances matrix to compute core_distances. If core_distances is given, the resulting indices and distances will be mutual reachability space.<br />Default: `std::nullopt`. |
+| `alpha` | in | `float` | distance scaling parameter as used in robust single linkage.<br />Default: `1.0`. |
 
 **Returns**
 
@@ -103,9 +103,9 @@ Usage example:
 | `params` | in | [`const all_neighbors_params&`](/api-reference/cpp-api-neighbors-all-neighbors#neighbors-all-neighbors-all-neighbors-params) | an instance of all_neighbors::all_neighbors_params that are parameters to build all-neighbors knn graph |
 | `dataset` | in | `raft::device_matrix_view<const float, int64_t, row_major>` | raft::device_matrix_view input dataset expected to be located in device memory |
 | `indices` | out | `raft::device_matrix_view<int64_t, int64_t, row_major>` | nearest neighbor indices of shape [n_row x k] |
-| `distances` | out | `std::optional<raft::device_matrix_view<float, int64_t, row_major>>` | nearest neighbor distances [n_row x k] Default: `std::nullopt`. |
-| `core_distances` | out | `std::optional<raft::device_vector_view<float, int64_t, row_major>>` | array for core distances of size [n_row]. Requires distances matrix to compute core_distances. If core_distances is given, the resulting indices and distances will be mutual reachability space. Default: `std::nullopt`. |
-| `alpha` | in | `float` | distance scaling parameter as used in robust single linkage. Default: `1.0`. |
+| `distances` | out | `std::optional<raft::device_matrix_view<float, int64_t, row_major>>` | nearest neighbor distances [n_row x k]<br />Default: `std::nullopt`. |
+| `core_distances` | out | `std::optional<raft::device_vector_view<float, int64_t, row_major>>` | array for core distances of size [n_row]. Requires distances matrix to compute core_distances. If core_distances is given, the resulting indices and distances will be mutual reachability space.<br />Default: `std::nullopt`. |
+| `alpha` | in | `float` | distance scaling parameter as used in robust single linkage.<br />Default: `1.0`. |
 
 **Returns**
 
diff --git a/fern/pages/cpp_api/cpp-api-neighbors-brute-force.md b/fern/pages/cpp_api/cpp-api-neighbors-brute-force.md
index a454823018..1bab0cbd05 100644
--- a/fern/pages/cpp_api/cpp-api-neighbors-brute-force.md
+++ b/fern/pages/cpp_api/cpp-api-neighbors-brute-force.md
@@ -567,7 +567,7 @@ The serialization format can be subject to changes, therefore loading an index s
 | `handle` | in | `raft::resources const&` | the raft handle |
 | `filename` | in | `const std::string&` | the file name for saving the index |
 | `index` | in | [`const cuvs::neighbors::brute_force::index<half, float>&`](/api-reference/cpp-api-neighbors-brute-force#neighbors-brute-force-index) | brute force index |
-| `include_dataset` | in | `bool` | whether to include the dataset in the serialized output Default: `true`. |
+| `include_dataset` | in | `bool` | whether to include the dataset in the serialized output<br />Default: `true`. |
 
 **Returns**
 
@@ -599,7 +599,7 @@ The serialization format can be subject to changes, therefore loading an index s
 | `handle` | in | `raft::resources const&` | the raft handle |
 | `filename` | in | `const std::string&` | the file name for saving the index |
 | `index` | in | [`const cuvs::neighbors::brute_force::index<float, float>&`](/api-reference/cpp-api-neighbors-brute-force#neighbors-brute-force-index) | brute force index |
-| `include_dataset` | in | `bool` | whether to include the dataset in the serialized output Default: `true`. |
+| `include_dataset` | in | `bool` | whether to include the dataset in the serialized output<br />Default: `true`. |
 
 **Returns**
 
@@ -625,7 +625,7 @@ The serialization format can be subject to changes, therefore loading an index s
 | `handle` | in | `raft::resources const&` | the raft handle |
 | `os` | in | `std::ostream&` | output stream |
 | `index` | in | [`const cuvs::neighbors::brute_force::index<half, float>&`](/api-reference/cpp-api-neighbors-brute-force#neighbors-brute-force-index) | brute force index |
-| `include_dataset` | in | `bool` | Whether or not to write out the dataset to the file. Default: `true`. |
+| `include_dataset` | in | `bool` | Whether or not to write out the dataset to the file.<br />Default: `true`. |
 
 **Returns**
 
@@ -651,7 +651,7 @@ The serialization format can be subject to changes, therefore loading an index s
 | `handle` | in | `raft::resources const&` | the raft handle |
 | `os` | in | `std::ostream&` | output stream |
 | `index` | in | [`const cuvs::neighbors::brute_force::index<float, float>&`](/api-reference/cpp-api-neighbors-brute-force#neighbors-brute-force-index) | brute force index |
-| `include_dataset` | in | `bool` | Whether or not to write out the dataset to the file. Default: `true`. |
+| `include_dataset` | in | `bool` | Whether or not to write out the dataset to the file.<br />Default: `true`. |
 
 **Returns**
 
diff --git a/fern/pages/cpp_api/cpp-api-neighbors-cagra.md b/fern/pages/cpp_api/cpp-api-neighbors-cagra.md
index 1e4430a5ee..51e4e27af2 100644
--- a/fern/pages/cpp_api/cpp-api-neighbors-cagra.md
+++ b/fern/pages/cpp_api/cpp-api-neighbors-cagra.md
@@ -108,8 +108,8 @@ Usage example:
 | `dataset` |  | `raft::matrix_extent<int64_t>` | The shape of the input dataset |
 | `M` |  | `int` | HNSW index parameter M |
 | `ef_construction` |  | `int` | HNSW index parameter ef_construction |
-| `heuristic` |  | [`hnsw_heuristic_type`](/api-reference/cpp-api-neighbors-cagra#neighbors-cagra-hnsw-heuristic-type) | The heuristic to use for selecting the graph build parameters Default: `hnsw_heuristic_type::SIMILAR_SEARCH_PERFORMANCE`. |
-| `metric` |  | [`cuvs::distance::DistanceType`](/api-reference/cpp-api-distance-distance#distance-distancetype) | The distance metric to search Default: `cuvs::distance::DistanceType::L2Expanded`. |
+| `heuristic` |  | [`hnsw_heuristic_type`](/api-reference/cpp-api-neighbors-cagra#neighbors-cagra-hnsw-heuristic-type) | The heuristic to use for selecting the graph build parameters<br />Default: `hnsw_heuristic_type::SIMILAR_SEARCH_PERFORMANCE`. |
+| `metric` |  | [`cuvs::distance::DistanceType`](/api-reference/cpp-api-distance-distance#distance-distancetype) | The distance metric to search<br /><br />Default: `cuvs::distance::DistanceType::L2Expanded`. |
 
 **Returns**
 
@@ -951,8 +951,8 @@ Usage example:
 | `params` | in | [`const cagra::extend_params&`](/api-reference/cpp-api-neighbors-cagra#neighbors-cagra-extend-params) | extend params |
 | `additional_dataset` | in | `raft::device_matrix_view<const float, int64_t, raft::row_major>` | additional dataset on device memory |
 | `idx` | in,out | [`cuvs::neighbors::cagra::index<float, uint32_t>&`](/api-reference/cpp-api-neighbors-cagra#neighbors-cagra-index) | CAGRA index |
-| `new_dataset_buffer_view` | out | `std::optional<raft::device_matrix_view<float, int64_t, raft::layout_stride>>` | memory buffer view for the dataset including the additional part. The data will be copied from the current index in this function. The num rows must be the sum of the original and additional datasets, cols must be the dimension of the dataset, and the stride must be the same as the original index dataset. This view will be stored in the output index. It is the caller's responsibility to ensure that dataset stays alive as long as the index. This option is useful when users want to manage the memory space for the dataset themselves. Default: `std::nullopt`. |
-| `new_graph_buffer_view` | out | `std::optional<raft::device_matrix_view<uint32_t, int64_t>>` | memory buffer view for the graph including the additional part. The data will be copied from the current index in this function. The num rows must be the sum of the original and additional datasets and cols must be the graph degree. This view will be stored in the output index. It is the caller's responsibility to ensure that dataset stays alive as long as the index. This option is useful when users want to manage the memory space for the graph themselves. Default: `std::nullopt`. |
+| `new_dataset_buffer_view` | out | `std::optional<raft::device_matrix_view<float, int64_t, raft::layout_stride>>` | memory buffer view for the dataset including the additional part. The data will be copied from the current index in this function. The num rows must be the sum of the original and additional datasets, cols must be the dimension of the dataset, and the stride must be the same as the original index dataset. This view will be stored in the output index. It is the caller's responsibility to ensure that dataset stays alive as long as the index. This option is useful when users want to manage the memory space for the dataset themselves.<br />Default: `std::nullopt`. |
+| `new_graph_buffer_view` | out | `std::optional<raft::device_matrix_view<uint32_t, int64_t>>` | memory buffer view for the graph including the additional part. The data will be copied from the current index in this function. The num rows must be the sum of the original and additional datasets and cols must be the graph degree. This view will be stored in the output index. It is the caller's responsibility to ensure that dataset stays alive as long as the index. This option is useful when users want to manage the memory space for the graph themselves.<br />Default: `std::nullopt`. |
 
 **Returns**
 
@@ -983,8 +983,8 @@ Usage example:
 | `params` | in | [`const cagra::extend_params&`](/api-reference/cpp-api-neighbors-cagra#neighbors-cagra-extend-params) | extend params |
 | `additional_dataset` | in | `raft::host_matrix_view<const float, int64_t, raft::row_major>` | additional dataset on host memory |
 | `idx` | in,out | [`cuvs::neighbors::cagra::index<float, uint32_t>&`](/api-reference/cpp-api-neighbors-cagra#neighbors-cagra-index) | CAGRA index |
-| `new_dataset_buffer_view` | out | `std::optional<raft::device_matrix_view<float, int64_t, raft::layout_stride>>` | memory buffer view for the dataset including the additional part. The data will be copied from the current index in this function. The num rows must be the sum of the original and additional datasets, cols must be the dimension of the dataset, and the stride must be the same as the original index dataset. This view will be stored in the output index. It is the caller's responsibility to ensure that dataset stays alive as long as the index. This option is useful when users want to manage the memory space for the dataset themselves. Default: `std::nullopt`. |
-| `new_graph_buffer_view` | out | `std::optional<raft::device_matrix_view<uint32_t, int64_t>>` | memory buffer view for the graph including the additional part. The data will be copied from the current index in this function. The num rows must be the sum of the original and additional datasets and cols must be the graph degree. This view will be stored in the output index. It is the caller's responsibility to ensure that dataset stays alive as long as the index. This option is useful when users want to manage the memory space for the graph themselves. Default: `std::nullopt`. |
+| `new_dataset_buffer_view` | out | `std::optional<raft::device_matrix_view<float, int64_t, raft::layout_stride>>` | memory buffer view for the dataset including the additional part. The data will be copied from the current index in this function. The num rows must be the sum of the original and additional datasets, cols must be the dimension of the dataset, and the stride must be the same as the original index dataset. This view will be stored in the output index. It is the caller's responsibility to ensure that dataset stays alive as long as the index. This option is useful when users want to manage the memory space for the dataset themselves.<br />Default: `std::nullopt`. |
+| `new_graph_buffer_view` | out | `std::optional<raft::device_matrix_view<uint32_t, int64_t>>` | memory buffer view for the graph including the additional part. The data will be copied from the current index in this function. The num rows must be the sum of the original and additional datasets and cols must be the graph degree. This view will be stored in the output index. It is the caller's responsibility to ensure that dataset stays alive as long as the index. This option is useful when users want to manage the memory space for the graph themselves.<br />Default: `std::nullopt`. |
 
 **Returns**
 
@@ -1015,8 +1015,8 @@ Usage example:
 | `params` | in | [`const cagra::extend_params&`](/api-reference/cpp-api-neighbors-cagra#neighbors-cagra-extend-params) | extend params |
 | `additional_dataset` | in | `raft::device_matrix_view<const half, int64_t, raft::row_major>` | additional dataset on device memory |
 | `idx` | in,out | [`cuvs::neighbors::cagra::index<half, uint32_t>&`](/api-reference/cpp-api-neighbors-cagra#neighbors-cagra-index) | CAGRA index |
-| `new_dataset_buffer_view` | out | `std::optional<raft::device_matrix_view<half, int64_t, raft::layout_stride>>` | memory buffer view for the dataset including the additional part. The data will be copied from the current index in this function. The num rows must be the sum of the original and additional datasets, cols must be the dimension of the dataset, and the stride must be the same as the original index dataset. This view will be stored in the output index. It is the caller's responsibility to ensure that dataset stays alive as long as the index. This option is useful when users want to manage the memory space for the dataset themselves. Default: `std::nullopt`. |
-| `new_graph_buffer_view` | out | `std::optional<raft::device_matrix_view<uint32_t, int64_t>>` | memory buffer view for the graph including the additional part. The data will be copied from the current index in this function. The num rows must be the sum of the original and additional datasets and cols must be the graph degree. This view will be stored in the output index. It is the caller's responsibility to ensure that dataset stays alive as long as the index. This option is useful when users want to manage the memory space for the graph themselves. Default: `std::nullopt`. |
+| `new_dataset_buffer_view` | out | `std::optional<raft::device_matrix_view<half, int64_t, raft::layout_stride>>` | memory buffer view for the dataset including the additional part. The data will be copied from the current index in this function. The num rows must be the sum of the original and additional datasets, cols must be the dimension of the dataset, and the stride must be the same as the original index dataset. This view will be stored in the output index. It is the caller's responsibility to ensure that dataset stays alive as long as the index. This option is useful when users want to manage the memory space for the dataset themselves.<br />Default: `std::nullopt`. |
+| `new_graph_buffer_view` | out | `std::optional<raft::device_matrix_view<uint32_t, int64_t>>` | memory buffer view for the graph including the additional part. The data will be copied from the current index in this function. The num rows must be the sum of the original and additional datasets and cols must be the graph degree. This view will be stored in the output index. It is the caller's responsibility to ensure that dataset stays alive as long as the index. This option is useful when users want to manage the memory space for the graph themselves.<br />Default: `std::nullopt`. |
 
 **Returns**
 
@@ -1047,8 +1047,8 @@ Usage example:
 | `params` | in | [`const cagra::extend_params&`](/api-reference/cpp-api-neighbors-cagra#neighbors-cagra-extend-params) | extend params |
 | `additional_dataset` | in | `raft::host_matrix_view<const half, int64_t, raft::row_major>` | additional dataset on host memory |
 | `idx` | in,out | [`cuvs::neighbors::cagra::index<half, uint32_t>&`](/api-reference/cpp-api-neighbors-cagra#neighbors-cagra-index) | CAGRA index |
-| `new_dataset_buffer_view` | out | `std::optional<raft::device_matrix_view<half, int64_t, raft::layout_stride>>` | memory buffer view for the dataset including the additional part. The data will be copied from the current index in this function. The num rows must be the sum of the original and additional datasets, cols must be the dimension of the dataset, and the stride must be the same as the original index dataset. This view will be stored in the output index. It is the caller's responsibility to ensure that dataset stays alive as long as the index. This option is useful when users want to manage the memory space for the dataset themselves. Default: `std::nullopt`. |
-| `new_graph_buffer_view` | out | `std::optional<raft::device_matrix_view<uint32_t, int64_t>>` | memory buffer view for the graph including the additional part. The data will be copied from the current index in this function. The num rows must be the sum of the original and additional datasets and cols must be the graph degree. This view will be stored in the output index. It is the caller's responsibility to ensure that dataset stays alive as long as the index. This option is useful when users want to manage the memory space for the graph themselves. Default: `std::nullopt`. |
+| `new_dataset_buffer_view` | out | `std::optional<raft::device_matrix_view<half, int64_t, raft::layout_stride>>` | memory buffer view for the dataset including the additional part. The data will be copied from the current index in this function. The num rows must be the sum of the original and additional datasets, cols must be the dimension of the dataset, and the stride must be the same as the original index dataset. This view will be stored in the output index. It is the caller's responsibility to ensure that dataset stays alive as long as the index. This option is useful when users want to manage the memory space for the dataset themselves.<br />Default: `std::nullopt`. |
+| `new_graph_buffer_view` | out | `std::optional<raft::device_matrix_view<uint32_t, int64_t>>` | memory buffer view for the graph including the additional part. The data will be copied from the current index in this function. The num rows must be the sum of the original and additional datasets and cols must be the graph degree. This view will be stored in the output index. It is the caller's responsibility to ensure that dataset stays alive as long as the index. This option is useful when users want to manage the memory space for the graph themselves.<br />Default: `std::nullopt`. |
 
 **Returns**
 
@@ -1079,8 +1079,8 @@ Usage example:
 | `params` | in | [`const cagra::extend_params&`](/api-reference/cpp-api-neighbors-cagra#neighbors-cagra-extend-params) | extend params |
 | `additional_dataset` | in | `raft::device_matrix_view<const int8_t, int64_t, raft::row_major>` | additional dataset on device memory |
 | `idx` | in,out | [`cuvs::neighbors::cagra::index<int8_t, uint32_t>&`](/api-reference/cpp-api-neighbors-cagra#neighbors-cagra-index) | CAGRA index |
-| `new_dataset_buffer_view` | out | `std::optional<raft::device_matrix_view<int8_t, int64_t, raft::layout_stride>>` | memory buffer view for the dataset including the additional part. The data will be copied from the current index in this function. The num rows must be the sum of the original and additional datasets, cols must be the dimension of the dataset, and the stride must be the same as the original index dataset. This view will be stored in the output index. It is the caller's responsibility to ensure that dataset stays alive as long as the index. This option is useful when users want to manage the memory space for the dataset themselves. Default: `std::nullopt`. |
-| `new_graph_buffer_view` | out | `std::optional<raft::device_matrix_view<uint32_t, int64_t>>` | memory buffer view for the graph including the additional part. The data will be copied from the current index in this function. The num rows must be the sum of the original and additional datasets and cols must be the graph degree. This view will be stored in the output index. It is the caller's responsibility to ensure that dataset stays alive as long as the index. This option is useful when users want to manage the memory space for the graph themselves. Default: `std::nullopt`. |
+| `new_dataset_buffer_view` | out | `std::optional<raft::device_matrix_view<int8_t, int64_t, raft::layout_stride>>` | memory buffer view for the dataset including the additional part. The data will be copied from the current index in this function. The num rows must be the sum of the original and additional datasets, cols must be the dimension of the dataset, and the stride must be the same as the original index dataset. This view will be stored in the output index. It is the caller's responsibility to ensure that dataset stays alive as long as the index. This option is useful when users want to manage the memory space for the dataset themselves.<br />Default: `std::nullopt`. |
+| `new_graph_buffer_view` | out | `std::optional<raft::device_matrix_view<uint32_t, int64_t>>` | memory buffer view for the graph including the additional part. The data will be copied from the current index in this function. The num rows must be the sum of the original and additional datasets and cols must be the graph degree. This view will be stored in the output index. It is the caller's responsibility to ensure that dataset stays alive as long as the index. This option is useful when users want to manage the memory space for the graph themselves.<br />Default: `std::nullopt`. |
 
 **Returns**
 
@@ -1111,8 +1111,8 @@ Usage example:
 | `params` | in | [`const cagra::extend_params&`](/api-reference/cpp-api-neighbors-cagra#neighbors-cagra-extend-params) | extend params |
 | `additional_dataset` | in | `raft::host_matrix_view<const int8_t, int64_t, raft::row_major>` | additional dataset on host memory |
 | `idx` | in,out | [`cuvs::neighbors::cagra::index<int8_t, uint32_t>&`](/api-reference/cpp-api-neighbors-cagra#neighbors-cagra-index) | CAGRA index |
-| `new_dataset_buffer_view` | out | `std::optional<raft::device_matrix_view<int8_t, int64_t, raft::layout_stride>>` | memory buffer view for the dataset including the additional part. The data will be copied from the current index in this function. The num rows must be the sum of the original and additional datasets, cols must be the dimension of the dataset, and the stride must be the same as the original index dataset. This view will be stored in the output index. It is the caller's responsibility to ensure that dataset stays alive as long as the index. This option is useful when users want to manage the memory space for the dataset themselves. Default: `std::nullopt`. |
-| `new_graph_buffer_view` | out | `std::optional<raft::device_matrix_view<uint32_t, int64_t>>` | memory buffer view for the graph including the additional part. The data will be copied from the current index in this function. The num rows must be the sum of the original and additional datasets and cols must be the graph degree. This view will be stored in the output index. It is the caller's responsibility to ensure that dataset stays alive as long as the index. This option is useful when users want to manage the memory space for the graph themselves. Default: `std::nullopt`. |
+| `new_dataset_buffer_view` | out | `std::optional<raft::device_matrix_view<int8_t, int64_t, raft::layout_stride>>` | memory buffer view for the dataset including the additional part. The data will be copied from the current index in this function. The num rows must be the sum of the original and additional datasets, cols must be the dimension of the dataset, and the stride must be the same as the original index dataset. This view will be stored in the output index. It is the caller's responsibility to ensure that dataset stays alive as long as the index. This option is useful when users want to manage the memory space for the dataset themselves.<br />Default: `std::nullopt`. |
+| `new_graph_buffer_view` | out | `std::optional<raft::device_matrix_view<uint32_t, int64_t>>` | memory buffer view for the graph including the additional part. The data will be copied from the current index in this function. The num rows must be the sum of the original and additional datasets and cols must be the graph degree. This view will be stored in the output index. It is the caller's responsibility to ensure that dataset stays alive as long as the index. This option is useful when users want to manage the memory space for the graph themselves.<br />Default: `std::nullopt`. |
 
 **Returns**
 
@@ -1143,8 +1143,8 @@ Usage example:
 | `params` | in | [`const cagra::extend_params&`](/api-reference/cpp-api-neighbors-cagra#neighbors-cagra-extend-params) | extend params |
 | `additional_dataset` | in | `raft::device_matrix_view<const uint8_t, int64_t, raft::row_major>` | additional dataset on host memory |
 | `idx` | in,out | [`cuvs::neighbors::cagra::index<uint8_t, uint32_t>&`](/api-reference/cpp-api-neighbors-cagra#neighbors-cagra-index) | CAGRA index |
-| `new_dataset_buffer_view` | out | `std::optional<raft::device_matrix_view<uint8_t, int64_t, raft::layout_stride>>` | memory buffer view for the dataset including the additional part. The data will be copied from the current index in this function. The num rows must be the sum of the original and additional datasets, cols must be the dimension of the dataset, and the stride must be the same as the original index dataset. This view will be stored in the output index. It is the caller's responsibility to ensure that dataset stays alive as long as the index. This option is useful when users want to manage the memory space for the dataset themselves. Default: `std::nullopt`. |
-| `new_graph_buffer_view` | out | `std::optional<raft::device_matrix_view<uint32_t, int64_t>>` | memory buffer view for the graph including the additional part. The data will be copied from the current index in this function. The num rows must be the sum of the original and additional datasets and cols must be the graph degree. This view will be stored in the output index. It is the caller's responsibility to ensure that dataset stays alive as long as the index. This option is useful when users want to manage the memory space for the graph themselves. Default: `std::nullopt`. |
+| `new_dataset_buffer_view` | out | `std::optional<raft::device_matrix_view<uint8_t, int64_t, raft::layout_stride>>` | memory buffer view for the dataset including the additional part. The data will be copied from the current index in this function. The num rows must be the sum of the original and additional datasets, cols must be the dimension of the dataset, and the stride must be the same as the original index dataset. This view will be stored in the output index. It is the caller's responsibility to ensure that dataset stays alive as long as the index. This option is useful when users want to manage the memory space for the dataset themselves.<br />Default: `std::nullopt`. |
+| `new_graph_buffer_view` | out | `std::optional<raft::device_matrix_view<uint32_t, int64_t>>` | memory buffer view for the graph including the additional part. The data will be copied from the current index in this function. The num rows must be the sum of the original and additional datasets and cols must be the graph degree. This view will be stored in the output index. It is the caller's responsibility to ensure that dataset stays alive as long as the index. This option is useful when users want to manage the memory space for the graph themselves.<br />Default: `std::nullopt`. |
 
 **Returns**
 
@@ -1175,8 +1175,8 @@ Usage example:
 | `params` | in | [`const cagra::extend_params&`](/api-reference/cpp-api-neighbors-cagra#neighbors-cagra-extend-params) | extend params |
 | `additional_dataset` | in | `raft::host_matrix_view<const uint8_t, int64_t, raft::row_major>` | additional dataset on host memory |
 | `idx` | in,out | [`cuvs::neighbors::cagra::index<uint8_t, uint32_t>&`](/api-reference/cpp-api-neighbors-cagra#neighbors-cagra-index) | CAGRA index |
-| `new_dataset_buffer_view` | out | `std::optional<raft::device_matrix_view<uint8_t, int64_t, raft::layout_stride>>` | memory buffer view for the dataset including the additional part. The data will be copied from the current index in this function. The num rows must be the sum of the original and additional datasets, cols must be the dimension of the dataset, and the stride must be the same as the original index dataset. This view will be stored in the output index. It is the caller's responsibility to ensure that dataset stays alive as long as the index. This option is useful when users want to manage the memory space for the dataset themselves. Default: `std::nullopt`. |
-| `new_graph_buffer_view` | out | `std::optional<raft::device_matrix_view<uint32_t, int64_t>>` | memory buffer view for the graph including the additional part. The data will be copied from the current index in this function. The num rows must be the sum of the original and additional datasets and cols must be the graph degree. This view will be stored in the output index. It is the caller's responsibility to ensure that dataset stays alive as long as the index. This option is useful when users want to manage the memory space for the graph themselves. Default: `std::nullopt`. |
+| `new_dataset_buffer_view` | out | `std::optional<raft::device_matrix_view<uint8_t, int64_t, raft::layout_stride>>` | memory buffer view for the dataset including the additional part. The data will be copied from the current index in this function. The num rows must be the sum of the original and additional datasets, cols must be the dimension of the dataset, and the stride must be the same as the original index dataset. This view will be stored in the output index. It is the caller's responsibility to ensure that dataset stays alive as long as the index. This option is useful when users want to manage the memory space for the dataset themselves.<br />Default: `std::nullopt`. |
+| `new_graph_buffer_view` | out | `std::optional<raft::device_matrix_view<uint32_t, int64_t>>` | memory buffer view for the graph including the additional part. The data will be copied from the current index in this function. The num rows must be the sum of the original and additional datasets and cols must be the graph degree. This view will be stored in the output index. It is the caller's responsibility to ensure that dataset stays alive as long as the index. This option is useful when users want to manage the memory space for the graph themselves.<br />Default: `std::nullopt`. |
 
 **Returns**
 
@@ -1223,7 +1223,7 @@ Experimental, both the API and the serialization format are subject to change.
 | `handle` | in | `raft::resources const&` | the raft handle |
 | `filename` | in | `const std::string&` | the file name for saving the index |
 | `index` | in | [`const cuvs::neighbors::cagra::index<float, uint32_t>&`](/api-reference/cpp-api-neighbors-cagra#neighbors-cagra-index) | CAGRA index |
-| `include_dataset` | in | `bool` | Whether or not to write out the dataset to the file. Default: `true`. |
+| `include_dataset` | in | `bool` | Whether or not to write out the dataset to the file.<br />Default: `true`. |
 
 **Returns**
 
@@ -1274,7 +1274,7 @@ Experimental, both the API and the serialization format are subject to change.
 | `handle` | in | `raft::resources const&` | the raft handle |
 | `os` | in | `std::ostream&` | output stream |
 | `index` | in | [`const cuvs::neighbors::cagra::index<float, uint32_t>&`](/api-reference/cpp-api-neighbors-cagra#neighbors-cagra-index) | CAGRA index |
-| `include_dataset` | in | `bool` | Whether or not to write out the dataset to the file. Default: `true`. |
+| `include_dataset` | in | `bool` | Whether or not to write out the dataset to the file.<br />Default: `true`. |
 
 **Returns**
 
@@ -1324,7 +1324,7 @@ Experimental, both the API and the serialization format are subject to change.
 | `handle` | in | `raft::resources const&` | the raft handle |
 | `filename` | in | `const std::string&` | the file name for saving the index |
 | `index` | in | [`const cuvs::neighbors::cagra::index<half, uint32_t>&`](/api-reference/cpp-api-neighbors-cagra#neighbors-cagra-index) | CAGRA index |
-| `include_dataset` | in | `bool` | Whether or not to write out the dataset to the file. Default: `true`. |
+| `include_dataset` | in | `bool` | Whether or not to write out the dataset to the file.<br />Default: `true`. |
 
 **Returns**
 
@@ -1374,7 +1374,7 @@ Experimental, both the API and the serialization format are subject to change.
 | `handle` | in | `raft::resources const&` | the raft handle |
 | `os` | in | `std::ostream&` | output stream |
 | `index` | in | [`const cuvs::neighbors::cagra::index<half, uint32_t>&`](/api-reference/cpp-api-neighbors-cagra#neighbors-cagra-index) | CAGRA index |
-| `include_dataset` | in | `bool` | Whether or not to write out the dataset to the file. Default: `true`. |
+| `include_dataset` | in | `bool` | Whether or not to write out the dataset to the file.<br />Default: `true`. |
 
 **Returns**
 
@@ -1424,7 +1424,7 @@ Experimental, both the API and the serialization format are subject to change.
 | `handle` | in | `raft::resources const&` | the raft handle |
 | `filename` | in | `const std::string&` | the file name for saving the index |
 | `index` | in | [`const cuvs::neighbors::cagra::index<int8_t, uint32_t>&`](/api-reference/cpp-api-neighbors-cagra#neighbors-cagra-index) | CAGRA index |
-| `include_dataset` | in | `bool` | Whether or not to write out the dataset to the file. Default: `true`. |
+| `include_dataset` | in | `bool` | Whether or not to write out the dataset to the file.<br />Default: `true`. |
 
 **Returns**
 
@@ -1474,7 +1474,7 @@ Experimental, both the API and the serialization format are subject to change.
 | `handle` | in | `raft::resources const&` | the raft handle |
 | `os` | in | `std::ostream&` | output stream |
 | `index` | in | [`const cuvs::neighbors::cagra::index<int8_t, uint32_t>&`](/api-reference/cpp-api-neighbors-cagra#neighbors-cagra-index) | CAGRA index |
-| `include_dataset` | in | `bool` | Whether or not to write out the dataset to the file. Default: `true`. |
+| `include_dataset` | in | `bool` | Whether or not to write out the dataset to the file.<br />Default: `true`. |
 
 **Returns**
 
@@ -1524,7 +1524,7 @@ Experimental, both the API and the serialization format are subject to change.
 | `handle` | in | `raft::resources const&` | the raft handle |
 | `filename` | in | `const std::string&` | the file name for saving the index |
 | `index` | in | [`const cuvs::neighbors::cagra::index<uint8_t, uint32_t>&`](/api-reference/cpp-api-neighbors-cagra#neighbors-cagra-index) | CAGRA index |
-| `include_dataset` | in | `bool` | Whether or not to write out the dataset to the file. Default: `true`. |
+| `include_dataset` | in | `bool` | Whether or not to write out the dataset to the file.<br />Default: `true`. |
 
 **Returns**
 
@@ -1574,7 +1574,7 @@ Experimental, both the API and the serialization format are subject to change.
 | `handle` | in | `raft::resources const&` | the raft handle |
 | `os` | in | `std::ostream&` | output stream |
 | `index` | in | [`const cuvs::neighbors::cagra::index<uint8_t, uint32_t>&`](/api-reference/cpp-api-neighbors-cagra#neighbors-cagra-index) | CAGRA index |
-| `include_dataset` | in | `bool` | Whether or not to write out the dataset to the file. Default: `true`. |
+| `include_dataset` | in | `bool` | Whether or not to write out the dataset to the file.<br />Default: `true`. |
 
 **Returns**
 
@@ -1629,7 +1629,7 @@ Experimental, both the API and the serialization format are subject to change.
 | `handle` | in | `raft::resources const&` | the raft handle |
 | `os` | in | `std::ostream&` | output stream |
 | `index` | in | [`const cuvs::neighbors::cagra::index<float, uint32_t>&`](/api-reference/cpp-api-neighbors-cagra#neighbors-cagra-index) | CAGRA index |
-| `dataset` | in | `std::optional<raft::host_matrix_view<const float, int64_t, raft::row_major>>` | [optional] host array that stores the dataset, required if the index does not contain the dataset. Default: `std::nullopt`. |
+| `dataset` | in | `std::optional<raft::host_matrix_view<const float, int64_t, raft::row_major>>` | [optional] host array that stores the dataset, required if the index does not contain the dataset.<br />Default: `std::nullopt`. |
 
 **Returns**
 
@@ -1659,7 +1659,7 @@ Experimental, both the API and the serialization format are subject to change.
 | `handle` | in | `raft::resources const&` | the raft handle |
 | `filename` | in | `const std::string&` | the file name for saving the index |
 | `index` | in | [`const cuvs::neighbors::cagra::index<float, uint32_t>&`](/api-reference/cpp-api-neighbors-cagra#neighbors-cagra-index) | CAGRA index |
-| `dataset` | in | `std::optional<raft::host_matrix_view<const float, int64_t, raft::row_major>>` | [optional] host array that stores the dataset, required if the index does not contain the dataset. Default: `std::nullopt`. |
+| `dataset` | in | `std::optional<raft::host_matrix_view<const float, int64_t, raft::row_major>>` | [optional] host array that stores the dataset, required if the index does not contain the dataset.<br />Default: `std::nullopt`. |
 
 **Returns**
 
@@ -1689,7 +1689,7 @@ Experimental, both the API and the serialization format are subject to change.
 | `handle` | in | `raft::resources const&` | the raft handle |
 | `os` | in | `std::ostream&` | output stream |
 | `index` | in | [`const cuvs::neighbors::cagra::index<half, uint32_t>&`](/api-reference/cpp-api-neighbors-cagra#neighbors-cagra-index) | CAGRA index |
-| `dataset` | in | `std::optional<raft::host_matrix_view<const half, int64_t, raft::row_major>>` | [optional] host array that stores the dataset, required if the index does not contain the dataset. Default: `std::nullopt`. |
+| `dataset` | in | `std::optional<raft::host_matrix_view<const half, int64_t, raft::row_major>>` | [optional] host array that stores the dataset, required if the index does not contain the dataset.<br />Default: `std::nullopt`. |
 
 **Returns**
 
@@ -1719,7 +1719,7 @@ Experimental, both the API and the serialization format are subject to change.
 | `handle` | in | `raft::resources const&` | the raft handle |
 | `filename` | in | `const std::string&` | the file name for saving the index |
 | `index` | in | [`const cuvs::neighbors::cagra::index<half, uint32_t>&`](/api-reference/cpp-api-neighbors-cagra#neighbors-cagra-index) | CAGRA index |
-| `dataset` | in | `std::optional<raft::host_matrix_view<const half, int64_t, raft::row_major>>` | [optional] host array that stores the dataset, required if the index does not contain the dataset. Default: `std::nullopt`. |
+| `dataset` | in | `std::optional<raft::host_matrix_view<const half, int64_t, raft::row_major>>` | [optional] host array that stores the dataset, required if the index does not contain the dataset.<br />Default: `std::nullopt`. |
 
 **Returns**
 
@@ -1749,7 +1749,7 @@ Experimental, both the API and the serialization format are subject to change.
 | `handle` | in | `raft::resources const&` | the raft handle |
 | `os` | in | `std::ostream&` | output stream |
 | `index` | in | [`const cuvs::neighbors::cagra::index<int8_t, uint32_t>&`](/api-reference/cpp-api-neighbors-cagra#neighbors-cagra-index) | CAGRA index |
-| `dataset` | in | `std::optional<raft::host_matrix_view<const int8_t, int64_t, raft::row_major>>` | [optional] host array that stores the dataset, required if the index does not contain the dataset. Default: `std::nullopt`. |
+| `dataset` | in | `std::optional<raft::host_matrix_view<const int8_t, int64_t, raft::row_major>>` | [optional] host array that stores the dataset, required if the index does not contain the dataset.<br />Default: `std::nullopt`. |
 
 **Returns**
 
@@ -1779,7 +1779,7 @@ Experimental, both the API and the serialization format are subject to change.
 | `handle` | in | `raft::resources const&` | the raft handle |
 | `filename` | in | `const std::string&` | the file name for saving the index |
 | `index` | in | [`const cuvs::neighbors::cagra::index<int8_t, uint32_t>&`](/api-reference/cpp-api-neighbors-cagra#neighbors-cagra-index) | CAGRA index |
-| `dataset` | in | `std::optional<raft::host_matrix_view<const int8_t, int64_t, raft::row_major>>` | [optional] host array that stores the dataset, required if the index does not contain the dataset. Default: `std::nullopt`. |
+| `dataset` | in | `std::optional<raft::host_matrix_view<const int8_t, int64_t, raft::row_major>>` | [optional] host array that stores the dataset, required if the index does not contain the dataset.<br />Default: `std::nullopt`. |
 
 **Returns**
 
@@ -1809,7 +1809,7 @@ Experimental, both the API and the serialization format are subject to change.
 | `handle` | in | `raft::resources const&` | the raft handle |
 | `os` | in | `std::ostream&` | output stream |
 | `index` | in | [`const cuvs::neighbors::cagra::index<uint8_t, uint32_t>&`](/api-reference/cpp-api-neighbors-cagra#neighbors-cagra-index) | CAGRA index |
-| `dataset` | in | `std::optional<raft::host_matrix_view<const uint8_t, int64_t, raft::row_major>>` | [optional] host array that stores the dataset, required if the index does not contain the dataset. Default: `std::nullopt`. |
+| `dataset` | in | `std::optional<raft::host_matrix_view<const uint8_t, int64_t, raft::row_major>>` | [optional] host array that stores the dataset, required if the index does not contain the dataset.<br />Default: `std::nullopt`. |
 
 **Returns**
 
@@ -1839,7 +1839,7 @@ Experimental, both the API and the serialization format are subject to change.
 | `handle` | in | `raft::resources const&` | the raft handle |
 | `filename` | in | `const std::string&` | the file name for saving the index |
 | `index` | in | [`const cuvs::neighbors::cagra::index<uint8_t, uint32_t>&`](/api-reference/cpp-api-neighbors-cagra#neighbors-cagra-index) | CAGRA index |
-| `dataset` | in | `std::optional<raft::host_matrix_view<const uint8_t, int64_t, raft::row_major>>` | [optional] host array that stores the dataset, required if the index does not contain the dataset. Default: `std::nullopt`. |
+| `dataset` | in | `std::optional<raft::host_matrix_view<const uint8_t, int64_t, raft::row_major>>` | [optional] host array that stores the dataset, required if the index does not contain the dataset.<br />Default: `std::nullopt`. |
 
 **Returns**
 
diff --git a/fern/pages/cpp_api/cpp-api-neighbors-dynamic-batching.md b/fern/pages/cpp_api/cpp-api-neighbors-dynamic-batching.md
index b929fd898b..29fa965d47 100644
--- a/fern/pages/cpp_api/cpp-api-neighbors-dynamic-batching.md
+++ b/fern/pages/cpp_api/cpp-api-neighbors-dynamic-batching.md
@@ -48,7 +48,7 @@ struct search_params : cuvs::neighbors::search_params {
 
 | Name | Type | Description |
 | --- | --- | --- |
-| `dispatch_timeout_ms` | `double` | How long a request can stay in the queue (milliseconds). Note, this only affects the dispatch time and does not reflect full request latency; the latter depends on the upstream search parameters and the batch size. |
+| `dispatch_timeout_ms` | `double` | How long a request can stay in the queue (milliseconds).<br />Note, this only affects the dispatch time and does not reflect full request latency; the latter depends on the upstream search parameters and the batch size. |
 
 ## Dynamic Batching index type
 
@@ -108,7 +108,7 @@ const cuvs::neighbors::filtering::base_filter* sample_filter = nullptr);
 | `params` | in | [`const cuvs::neighbors::dynamic_batching::index_params&`](/api-reference/cpp-api-neighbors-dynamic-batching#neighbors-dynamic-batching-index-params) | dynamic batching parameters |
 | `upstream_index` | in | `const Upstream&` | the original index to perform the search (the reference must be alive for the lifetime of the dynamic batching index) |
 | `upstream_params` | in | `const typename Upstream::search_params_type&` | the original index search parameters for all queries in a batch (the parameters are captured by value for the lifetime of the dynamic batching index) |
-| `sample_filter` | in | `const cuvs::neighbors::filtering::base_filter*` | filtering function, if any, must be the same for all requests in a batch (the pointer must be alive for the lifetime of the dynamic batching index) Default: `nullptr`. |
+| `sample_filter` | in | `const cuvs::neighbors::filtering::base_filter*` | filtering function, if any, must be the same for all requests in a batch (the pointer must be alive for the lifetime of the dynamic batching index)<br />Default: `nullptr`. |
 
 **Returns**
 
diff --git a/fern/pages/cpp_api/cpp-api-neighbors-epsilon-neighborhood.md b/fern/pages/cpp_api/cpp-api-neighbors-epsilon-neighborhood.md
index ec46d8378d..88e8552610 100644
--- a/fern/pages/cpp_api/cpp-api-neighbors-epsilon-neighborhood.md
+++ b/fern/pages/cpp_api/cpp-api-neighbors-epsilon-neighborhood.md
@@ -44,7 +44,7 @@ Currently, only L2Unexpanded (L2-squared) distance metric is supported. Other me
 | `adj` | out | `raft::device_matrix_view<bool, matrix_idx_t, raft::row_major>` | adjacency matrix [row-major] [on device] [dim = m x n] |
 | `vd` | out | `raft::device_vector_view<idx_t, matrix_idx_t>` | vertex degree array [on device] [len = m + 1] `vd + m` stores the total number of edges in the adjacency matrix. Pass a nullptr if you don't need this info. |
 | `eps` | in | `value_t` | defines epsilon neighborhood radius (should be passed as squared when using L2Unexpanded metric) |
-| `metric` | in | [`cuvs::distance::DistanceType`](/api-reference/cpp-api-distance-distance#distance-distancetype) | distance metric to use. Currently only L2Unexpanded is supported. Default: `cuvs::distance::DistanceType::L2Unexpanded`. |
+| `metric` | in | [`cuvs::distance::DistanceType`](/api-reference/cpp-api-distance-distance#distance-distancetype) | distance metric to use. Currently only L2Unexpanded is supported.<br />Default: `cuvs::distance::DistanceType::L2Unexpanded`. |
 
 **Returns**
 
diff --git a/fern/pages/cpp_api/cpp-api-neighbors-hnsw.md b/fern/pages/cpp_api/cpp-api-neighbors-hnsw.md
index b88ab1c789..e4a29faed5 100644
--- a/fern/pages/cpp_api/cpp-api-neighbors-hnsw.md
+++ b/fern/pages/cpp_api/cpp-api-neighbors-hnsw.md
@@ -84,7 +84,7 @@ index(int dim, cuvs::distance::DistanceType metric, HnswHierarchy hierarchy = Hn
 | --- | --- | --- | --- |
 | `dim` | in | `int` | dimensions of the training dataset |
 | `metric` | in | [`cuvs::distance::DistanceType`](/api-reference/cpp-api-distance-distance#distance-distancetype) | distance metric to search. Supported metrics ("L2Expanded", "InnerProduct") |
-| `hierarchy` | in | [`HnswHierarchy`](/api-reference/cpp-api-neighbors-hnsw#neighbors-hnsw-hnswhierarchy) | hierarchy used for upper HNSW layers Default: `HnswHierarchy::NONE`. |
+| `hierarchy` | in | [`HnswHierarchy`](/api-reference/cpp-api-neighbors-hnsw#neighbors-hnsw-hnswhierarchy) | hierarchy used for upper HNSW layers<br />Default: `HnswHierarchy::NONE`. |
 
 **Returns**
 
@@ -286,7 +286,10 @@ Usage example:
 <a id="neighbors-hnsw-from-cagra"></a>
 ### neighbors::hnsw::from_cagra
 
-Construct an hnswlib index from a CAGRA index NOTE: When `hnsw::index_params.hierarchy` is: 1. `NONE`: This method uses the filesystem to write the CAGRA index in `/tmp/&lt;random_number&gt;.bin` before reading it as an hnswlib index, then deleting the temporary file. The returned index is immutable and can only be searched by the hnswlib wrapper in cuVS, as the format is not compatible with the original hnswlib. 2. `CPU`: The returned index is mutable and can be extended with additional vectors. The serialized index is also compatible with the original hnswlib library.
+Construct an hnswlib index from a CAGRA index NOTE: When `hnsw::index_params.hierarchy` is:
+
+1. `NONE`: This method uses the filesystem to write the CAGRA index in `/tmp/&lt;random_number&gt;.bin` before reading it as an hnswlib index, then deleting the temporary file. The returned index is immutable and can only be searched by the hnswlib wrapper in cuVS, as the format is not compatible with the original hnswlib.
+2. `CPU`: The returned index is mutable and can be extended with additional vectors. The serialized index is also compatible with the original hnswlib library.
 
 ```cpp
 std::unique_ptr<index<float>> from_cagra(
@@ -306,7 +309,7 @@ Usage example:
 | `res` | in | `raft::resources const&` | raft resources |
 | `params` | in | `const index_params&` | hnsw index parameters |
 | `cagra_index` | in | [`const cuvs::neighbors::cagra::index<float, uint32_t>&`](/api-reference/cpp-api-neighbors-cagra#neighbors-cagra-index) | cagra index |
-| `dataset` | in | `std::optional<raft::host_matrix_view<const float, int64_t, raft::row_major>>` | optional dataset to avoid extra memory copy when hierarchy is `CPU` Default: `std::nullopt`. |
+| `dataset` | in | `std::optional<raft::host_matrix_view<const float, int64_t, raft::row_major>>` | optional dataset to avoid extra memory copy when hierarchy is `CPU`<br /><br />Default: `std::nullopt`. |
 
 **Returns**
 
@@ -314,7 +317,10 @@ Usage example:
 
 **Additional overload:** `neighbors::hnsw::from_cagra`
 
-Construct an hnswlib index from a CAGRA index NOTE: When `hnsw::index_params.hierarchy` is: 1. `NONE`: This method uses the filesystem to write the CAGRA index in `/tmp/&lt;random_number&gt;.bin` before reading it as an hnswlib index, then deleting the temporary file. The returned index is immutable and can only be searched by the hnswlib wrapper in cuVS, as the format is not compatible with the original hnswlib. 2. `CPU`: The returned index is mutable and can be extended with additional vectors. The serialized index is also compatible with the original hnswlib library.
+Construct an hnswlib index from a CAGRA index NOTE: When `hnsw::index_params.hierarchy` is:
+
+1. `NONE`: This method uses the filesystem to write the CAGRA index in `/tmp/&lt;random_number&gt;.bin` before reading it as an hnswlib index, then deleting the temporary file. The returned index is immutable and can only be searched by the hnswlib wrapper in cuVS, as the format is not compatible with the original hnswlib.
+2. `CPU`: The returned index is mutable and can be extended with additional vectors. The serialized index is also compatible with the original hnswlib library.
 
 ```cpp
 std::unique_ptr<index<half>> from_cagra(
@@ -334,7 +340,7 @@ Usage example:
 | `res` | in | `raft::resources const&` | raft resources |
 | `params` | in | `const index_params&` | hnsw index parameters |
 | `cagra_index` | in | [`const cuvs::neighbors::cagra::index<half, uint32_t>&`](/api-reference/cpp-api-neighbors-cagra#neighbors-cagra-index) | cagra index |
-| `dataset` | in | `std::optional<raft::host_matrix_view<const half, int64_t, raft::row_major>>` | optional dataset to avoid extra memory copy when hierarchy is `CPU` Default: `std::nullopt`. |
+| `dataset` | in | `std::optional<raft::host_matrix_view<const half, int64_t, raft::row_major>>` | optional dataset to avoid extra memory copy when hierarchy is `CPU`<br /><br />Default: `std::nullopt`. |
 
 **Returns**
 
@@ -342,7 +348,10 @@ Usage example:
 
 **Additional overload:** `neighbors::hnsw::from_cagra`
 
-Construct an hnswlib index from a CAGRA index NOTE: When `hnsw::index_params.hierarchy` is: 1. `NONE`: This method uses the filesystem to write the CAGRA index in `/tmp/&lt;random_number&gt;.bin` before reading it as an hnswlib index, then deleting the temporary file. The returned index is immutable and can only be searched by the hnswlib wrapper in cuVS, as the format is not compatible with the original hnswlib. 2. `CPU`: The returned index is mutable and can be extended with additional vectors. The serialized index is also compatible with the original hnswlib library.
+Construct an hnswlib index from a CAGRA index NOTE: When `hnsw::index_params.hierarchy` is:
+
+1. `NONE`: This method uses the filesystem to write the CAGRA index in `/tmp/&lt;random_number&gt;.bin` before reading it as an hnswlib index, then deleting the temporary file. The returned index is immutable and can only be searched by the hnswlib wrapper in cuVS, as the format is not compatible with the original hnswlib.
+2. `CPU`: The returned index is mutable and can be extended with additional vectors. The serialized index is also compatible with the original hnswlib library.
 
 ```cpp
 std::unique_ptr<index<uint8_t>> from_cagra(
@@ -362,7 +371,7 @@ Usage example:
 | `res` | in | `raft::resources const&` | raft resources |
 | `params` | in | `const index_params&` | hnsw index parameters |
 | `cagra_index` | in | [`const cuvs::neighbors::cagra::index<uint8_t, uint32_t>&`](/api-reference/cpp-api-neighbors-cagra#neighbors-cagra-index) | cagra index |
-| `dataset` | in | `std::optional<raft::host_matrix_view<const uint8_t, int64_t, raft::row_major>>` | optional dataset to avoid extra memory copy when hierarchy is `CPU` Default: `std::nullopt`. |
+| `dataset` | in | `std::optional<raft::host_matrix_view<const uint8_t, int64_t, raft::row_major>>` | optional dataset to avoid extra memory copy when hierarchy is `CPU`<br /><br />Default: `std::nullopt`. |
 
 **Returns**
 
@@ -370,7 +379,10 @@ Usage example:
 
 **Additional overload:** `neighbors::hnsw::from_cagra`
 
-Construct an hnswlib index from a CAGRA index NOTE: When `hnsw::index_params.hierarchy` is: 1. `NONE`: This method uses the filesystem to write the CAGRA index in `/tmp/&lt;random_number&gt;.bin` before reading it as an hnswlib index, then deleting the temporary file. The returned index is immutable and can only be searched by the hnswlib wrapper in cuVS, as the format is not compatible with the original hnswlib. 2. `CPU`: The returned index is mutable and can be extended with additional vectors. The serialized index is also compatible with the original hnswlib library.
+Construct an hnswlib index from a CAGRA index NOTE: When `hnsw::index_params.hierarchy` is:
+
+1. `NONE`: This method uses the filesystem to write the CAGRA index in `/tmp/&lt;random_number&gt;.bin` before reading it as an hnswlib index, then deleting the temporary file. The returned index is immutable and can only be searched by the hnswlib wrapper in cuVS, as the format is not compatible with the original hnswlib.
+2. `CPU`: The returned index is mutable and can be extended with additional vectors. The serialized index is also compatible with the original hnswlib library.
 
 ```cpp
 std::unique_ptr<index<int8_t>> from_cagra(
@@ -390,7 +402,7 @@ Usage example:
 | `res` | in | `raft::resources const&` | raft resources |
 | `params` | in | `const index_params&` | hnsw index parameters |
 | `cagra_index` | in | [`const cuvs::neighbors::cagra::index<int8_t, uint32_t>&`](/api-reference/cpp-api-neighbors-cagra#neighbors-cagra-index) | cagra index |
-| `dataset` | in | `std::optional<raft::host_matrix_view<const int8_t, int64_t, raft::row_major>>` | optional dataset to avoid extra memory copy when hierarchy is `CPU` Default: `std::nullopt`. |
+| `dataset` | in | `std::optional<raft::host_matrix_view<const int8_t, int64_t, raft::row_major>>` | optional dataset to avoid extra memory copy when hierarchy is `CPU`<br /><br />Default: `std::nullopt`. |
 
 **Returns**
 
diff --git a/fern/pages/cpp_api/cpp-api-neighbors-ivf-pq.md b/fern/pages/cpp_api/cpp-api-neighbors-ivf-pq.md
index 6174452b3e..eea6eab87f 100644
--- a/fern/pages/cpp_api/cpp-api-neighbors-ivf-pq.md
+++ b/fern/pages/cpp_api/cpp-api-neighbors-ivf-pq.md
@@ -221,9 +221,9 @@ This constructor creates an owning index with the given parameters.
 | `codebook_kind` |  | [`codebook_gen`](/api-reference/cpp-api-neighbors-ivf-pq#neighbors-ivf-pq-codebook-gen) | How PQ codebooks are created |
 | `n_lists` |  | `uint32_t` | Number of inverted lists (clusters) |
 | `dim` |  | `uint32_t` | Dimensionality of the input data |
-| `pq_bits` |  | `uint32_t` | Bit length of vector elements after PQ compression Default: `8`. |
-| `pq_dim` |  | `uint32_t` | Dimensionality after PQ compression (0 = auto-select) Default: `0`. |
-| `conservative_memory_allocation` |  | `bool` | Memory allocation strategy Default: `false`. |
+| `pq_bits` |  | `uint32_t` | Bit length of vector elements after PQ compression<br />Default: `8`. |
+| `pq_dim` |  | `uint32_t` | Dimensionality after PQ compression (0 = auto-select)<br />Default: `0`. |
+| `conservative_memory_allocation` |  | `bool` | Memory allocation strategy<br />Default: `false`. |
 
 **Returns**
 
diff --git a/fern/pages/cpp_api/cpp-api-neighbors-ivf-sq.md b/fern/pages/cpp_api/cpp-api-neighbors-ivf-sq.md
index 3b33951697..dcd40f182e 100644
--- a/fern/pages/cpp_api/cpp-api-neighbors-ivf-sq.md
+++ b/fern/pages/cpp_api/cpp-api-neighbors-ivf-sq.md
@@ -17,8 +17,8 @@ IVF-SQ index build parameters
 constexpr static uint32_t kIndexGroupSize = 32;
 ```
 
-<a id="cuvs-neighbors-ivf-sq-index-params"></a>
-### cuvs::neighbors::ivf_sq::index_params
+<a id="neighbors-ivf-sq-index-params"></a>
+### neighbors::ivf_sq::index_params
 
 IVF-SQ index build parameters.
 
@@ -46,8 +46,8 @@ struct index_params : cuvs::neighbors::index_params {
 
 ## IVF-SQ index search parameters
 
-<a id="cuvs-neighbors-ivf-sq-search-params"></a>
-### cuvs::neighbors::ivf_sq::search_params
+<a id="neighbors-ivf-sq-search-params"></a>
+### neighbors::ivf_sq::search_params
 
 IVF-SQ index search parameters
 
@@ -65,8 +65,8 @@ struct search_params : cuvs::neighbors::search_params {
 
 ## IVF-SQ list storage spec
 
-<a id="cuvs-neighbors-ivf-sq-list-spec"></a>
-### cuvs::neighbors::ivf_sq::list_spec
+<a id="neighbors-ivf-sq-list-spec"></a>
+### neighbors::ivf_sq::list_spec
 
 IVF-SQ list storage spec
 
@@ -89,8 +89,8 @@ struct list_spec {
 
 ## IVF-SQ index
 
-<a id="cuvs-neighbors-ivf-sq-index"></a>
-### cuvs::neighbors::ivf_sq::index
+<a id="neighbors-ivf-sq-index"></a>
+### neighbors::ivf_sq::index
 
 IVF-SQ index.
 
@@ -117,8 +117,8 @@ struct index;
 
 ## IVF-SQ index build
 
-<a id="cuvs-neighbors-ivf-sq-build"></a>
-### cuvs::neighbors::ivf_sq::build
+<a id="neighbors-ivf-sq-build"></a>
+### neighbors::ivf_sq::build
 
 Build the index from the dataset for efficient search.
 
@@ -143,14 +143,14 @@ Usage example:
 | Name | Direction | Type | Description |
 | --- | --- | --- | --- |
 | `handle` | in | `raft::resources const&` |  |
-| `index_params` | in | [`const cuvs::neighbors::ivf_sq::index_params&`](/api-reference/cpp-api-neighbors-ivf-sq#cuvs-neighbors-ivf-sq-index-params) | configure the index building |
+| `index_params` | in | [`const cuvs::neighbors::ivf_sq::index_params&`](/api-reference/cpp-api-neighbors-ivf-sq#neighbors-ivf-sq-index-params) | configure the index building |
 | `dataset` | in | `raft::device_matrix_view<const float, int64_t, raft::row_major>` | a device pointer to a row-major matrix [n_rows, dim] |
 
 **Returns**
 
-[`cuvs::neighbors::ivf_sq::index<uint8_t>`](/api-reference/cpp-api-neighbors-ivf-sq#cuvs-neighbors-ivf-sq-index)
+[`cuvs::neighbors::ivf_sq::index<uint8_t>`](/api-reference/cpp-api-neighbors-ivf-sq#neighbors-ivf-sq-index)
 
-**Additional overload:** `cuvs::neighbors::ivf_sq::build`
+**Additional overload:** `neighbors::ivf_sq::build`
 
 Build the index from the dataset for efficient search.
 
@@ -168,14 +168,14 @@ Usage example:
 | Name | Direction | Type | Description |
 | --- | --- | --- | --- |
 | `handle` | in | `raft::resources const&` |  |
-| `index_params` | in | [`const cuvs::neighbors::ivf_sq::index_params&`](/api-reference/cpp-api-neighbors-ivf-sq#cuvs-neighbors-ivf-sq-index-params) | configure the index building |
+| `index_params` | in | [`const cuvs::neighbors::ivf_sq::index_params&`](/api-reference/cpp-api-neighbors-ivf-sq#neighbors-ivf-sq-index-params) | configure the index building |
 | `dataset` | in | `raft::device_matrix_view<const half, int64_t, raft::row_major>` | a device pointer to a row-major matrix [n_rows, dim] |
 
 **Returns**
 
-[`cuvs::neighbors::ivf_sq::index<uint8_t>`](/api-reference/cpp-api-neighbors-ivf-sq#cuvs-neighbors-ivf-sq-index)
+[`cuvs::neighbors::ivf_sq::index<uint8_t>`](/api-reference/cpp-api-neighbors-ivf-sq#neighbors-ivf-sq-index)
 
-**Additional overload:** `cuvs::neighbors::ivf_sq::build`
+**Additional overload:** `neighbors::ivf_sq::build`
 
 Build the index from the dataset for efficient search.
 
@@ -193,14 +193,14 @@ Usage example:
 | Name | Direction | Type | Description |
 | --- | --- | --- | --- |
 | `handle` | in | `raft::resources const&` |  |
-| `index_params` | in | [`const cuvs::neighbors::ivf_sq::index_params&`](/api-reference/cpp-api-neighbors-ivf-sq#cuvs-neighbors-ivf-sq-index-params) | configure the index building |
+| `index_params` | in | [`const cuvs::neighbors::ivf_sq::index_params&`](/api-reference/cpp-api-neighbors-ivf-sq#neighbors-ivf-sq-index-params) | configure the index building |
 | `dataset` | in | `raft::host_matrix_view<const float, int64_t, raft::row_major>` | a host pointer to a row-major matrix [n_rows, dim] |
 
 **Returns**
 
-[`cuvs::neighbors::ivf_sq::index<uint8_t>`](/api-reference/cpp-api-neighbors-ivf-sq#cuvs-neighbors-ivf-sq-index)
+[`cuvs::neighbors::ivf_sq::index<uint8_t>`](/api-reference/cpp-api-neighbors-ivf-sq#neighbors-ivf-sq-index)
 
-**Additional overload:** `cuvs::neighbors::ivf_sq::build`
+**Additional overload:** `neighbors::ivf_sq::build`
 
 Build the index from the dataset for efficient search.
 
@@ -218,17 +218,17 @@ Usage example:
 | Name | Direction | Type | Description |
 | --- | --- | --- | --- |
 | `handle` | in | `raft::resources const&` |  |
-| `index_params` | in | [`const cuvs::neighbors::ivf_sq::index_params&`](/api-reference/cpp-api-neighbors-ivf-sq#cuvs-neighbors-ivf-sq-index-params) | configure the index building |
+| `index_params` | in | [`const cuvs::neighbors::ivf_sq::index_params&`](/api-reference/cpp-api-neighbors-ivf-sq#neighbors-ivf-sq-index-params) | configure the index building |
 | `dataset` | in | `raft::host_matrix_view<const half, int64_t, raft::row_major>` | a host pointer to a row-major matrix [n_rows, dim] |
 
 **Returns**
 
-[`cuvs::neighbors::ivf_sq::index<uint8_t>`](/api-reference/cpp-api-neighbors-ivf-sq#cuvs-neighbors-ivf-sq-index)
+[`cuvs::neighbors::ivf_sq::index<uint8_t>`](/api-reference/cpp-api-neighbors-ivf-sq#neighbors-ivf-sq-index)
 
 ## IVF-SQ index extend
 
-<a id="cuvs-neighbors-ivf-sq-extend"></a>
-### cuvs::neighbors::ivf_sq::extend
+<a id="neighbors-ivf-sq-extend"></a>
+### neighbors::ivf_sq::extend
 
 Extend the index with the new data in-place.
 
@@ -248,13 +248,13 @@ Usage example:
 | `handle` | in | `raft::resources const&` |  |
 | `new_vectors` | in | `raft::device_matrix_view<const float, int64_t, raft::row_major>` | a device matrix view to a row-major matrix [n_rows, idx.dim()] |
 | `new_indices` | in | `std::optional<raft::device_vector_view<const int64_t, int64_t>>` | a device vector view to a vector of indices [n_rows]. If the original index is empty (`idx.size() == 0`), you can pass `std::nullopt` here to imply a continuous range `[0...n_rows)`. |
-| `idx` | inout | [`cuvs::neighbors::ivf_sq::index<uint8_t>*`](/api-reference/cpp-api-neighbors-ivf-sq#cuvs-neighbors-ivf-sq-index) | pointer to ivf_sq::index |
+| `idx` | inout | [`cuvs::neighbors::ivf_sq::index<uint8_t>*`](/api-reference/cpp-api-neighbors-ivf-sq#neighbors-ivf-sq-index) | pointer to ivf_sq::index |
 
 **Returns**
 
 `void`
 
-**Additional overload:** `cuvs::neighbors::ivf_sq::extend`
+**Additional overload:** `neighbors::ivf_sq::extend`
 
 Extend the index with the new data in-place.
 
@@ -274,13 +274,13 @@ Usage example:
 | `handle` | in | `raft::resources const&` |  |
 | `new_vectors` | in | `raft::device_matrix_view<const half, int64_t, raft::row_major>` | a device matrix view to a row-major matrix [n_rows, idx.dim()] |
 | `new_indices` | in | `std::optional<raft::device_vector_view<const int64_t, int64_t>>` | a device vector view to a vector of indices [n_rows]. If the original index is empty (`idx.size() == 0`), you can pass `std::nullopt` here to imply a continuous range `[0...n_rows)`. |
-| `idx` | inout | [`cuvs::neighbors::ivf_sq::index<uint8_t>*`](/api-reference/cpp-api-neighbors-ivf-sq#cuvs-neighbors-ivf-sq-index) | pointer to ivf_sq::index |
+| `idx` | inout | [`cuvs::neighbors::ivf_sq::index<uint8_t>*`](/api-reference/cpp-api-neighbors-ivf-sq#neighbors-ivf-sq-index) | pointer to ivf_sq::index |
 
 **Returns**
 
 `void`
 
-**Additional overload:** `cuvs::neighbors::ivf_sq::extend`
+**Additional overload:** `neighbors::ivf_sq::extend`
 
 Extend the index with the new data in-place.
 
@@ -300,13 +300,13 @@ Usage example:
 | `handle` | in | `raft::resources const&` |  |
 | `new_vectors` | in | `raft::host_matrix_view<const float, int64_t, raft::row_major>` | a host matrix view to a row-major matrix [n_rows, idx.dim()] |
 | `new_indices` | in | `std::optional<raft::host_vector_view<const int64_t, int64_t>>` | a host vector view to a vector of indices [n_rows]. If the original index is empty (`idx.size() == 0`), you can pass `std::nullopt` here to imply a continuous range `[0...n_rows)`. |
-| `idx` | inout | [`cuvs::neighbors::ivf_sq::index<uint8_t>*`](/api-reference/cpp-api-neighbors-ivf-sq#cuvs-neighbors-ivf-sq-index) | pointer to ivf_sq::index |
+| `idx` | inout | [`cuvs::neighbors::ivf_sq::index<uint8_t>*`](/api-reference/cpp-api-neighbors-ivf-sq#neighbors-ivf-sq-index) | pointer to ivf_sq::index |
 
 **Returns**
 
 `void`
 
-**Additional overload:** `cuvs::neighbors::ivf_sq::extend`
+**Additional overload:** `neighbors::ivf_sq::extend`
 
 Extend the index with the new data in-place.
 
@@ -326,7 +326,7 @@ Usage example:
 | `handle` | in | `raft::resources const&` |  |
 | `new_vectors` | in | `raft::host_matrix_view<const half, int64_t, raft::row_major>` | a host matrix view to a row-major matrix [n_rows, idx.dim()] |
 | `new_indices` | in | `std::optional<raft::host_vector_view<const int64_t, int64_t>>` | a host vector view to a vector of indices [n_rows]. If the original index is empty (`idx.size() == 0`), you can pass `std::nullopt` here to imply a continuous range `[0...n_rows)`. |
-| `idx` | inout | [`cuvs::neighbors::ivf_sq::index<uint8_t>*`](/api-reference/cpp-api-neighbors-ivf-sq#cuvs-neighbors-ivf-sq-index) | pointer to ivf_sq::index |
+| `idx` | inout | [`cuvs::neighbors::ivf_sq::index<uint8_t>*`](/api-reference/cpp-api-neighbors-ivf-sq#neighbors-ivf-sq-index) | pointer to ivf_sq::index |
 
 **Returns**
 
@@ -334,8 +334,8 @@ Usage example:
 
 ## IVF-SQ index serialize
 
-<a id="cuvs-neighbors-ivf-sq-serialize"></a>
-### cuvs::neighbors::ivf_sq::serialize
+<a id="neighbors-ivf-sq-serialize"></a>
+### neighbors::ivf_sq::serialize
 
 Save the index to file.
 
@@ -353,14 +353,14 @@ Experimental, both the API and the serialization format are subject to change.
 | --- | --- | --- | --- |
 | `handle` | in | `raft::resources const&` | the raft handle |
 | `filename` | in | `const std::string&` | the file name for saving the index |
-| `index` | in | [`const cuvs::neighbors::ivf_sq::index<uint8_t>&`](/api-reference/cpp-api-neighbors-ivf-sq#cuvs-neighbors-ivf-sq-index) | IVF-SQ index |
+| `index` | in | [`const cuvs::neighbors::ivf_sq::index<uint8_t>&`](/api-reference/cpp-api-neighbors-ivf-sq#neighbors-ivf-sq-index) | IVF-SQ index |
 
 **Returns**
 
 `void`
 
-<a id="cuvs-neighbors-ivf-sq-deserialize"></a>
-### cuvs::neighbors::ivf_sq::deserialize
+<a id="neighbors-ivf-sq-deserialize"></a>
+### neighbors::ivf_sq::deserialize
 
 Load index from file.
 
@@ -378,7 +378,7 @@ Experimental, both the API and the serialization format are subject to change.
 | --- | --- | --- | --- |
 | `handle` | in | `raft::resources const&` | the raft handle |
 | `filename` | in | `const std::string&` | the name of the file that stores the index |
-| `index` | out | [`cuvs::neighbors::ivf_sq::index<uint8_t>*`](/api-reference/cpp-api-neighbors-ivf-sq#cuvs-neighbors-ivf-sq-index) | IVF-SQ index |
+| `index` | out | [`cuvs::neighbors::ivf_sq::index<uint8_t>*`](/api-reference/cpp-api-neighbors-ivf-sq#neighbors-ivf-sq-index) | IVF-SQ index |
 
 **Returns**
 
diff --git a/fern/pages/cpp_api/cpp-api-neighbors-nn-descent.md b/fern/pages/cpp_api/cpp-api-neighbors-nn-descent.md
index 1e5eda9894..abbb9cbe58 100644
--- a/fern/pages/cpp_api/cpp-api-neighbors-nn-descent.md
+++ b/fern/pages/cpp_api/cpp-api-neighbors-nn-descent.md
@@ -11,7 +11,7 @@ _Source header: `cuvs/neighbors/nn_descent.hpp`_
 <a id="neighbors-nn-descent-dist-comp-dtype"></a>
 ### neighbors::nn_descent::DIST_COMP_DTYPE
 
-Dtype to use for distance computation - `AUTO`: Automatically determine the best dtype for distance computation based on the dataset dimensions. - `FP32`: Use fp32 distance computation for better precision at the cost of performance and memory usage. - `FP16`: Use fp16 distance computation.
+Dtype to use for distance computation
 
 ```cpp
 enum class DIST_COMP_DTYPE {
@@ -23,16 +23,16 @@ enum class DIST_COMP_DTYPE {
 
 **Values**
 
-| Name | Value |
-| --- | --- |
-| `AUTO` | `0` |
-| `FP32` | `1` |
-| `FP16` | `2` |
+| Name | Value | Description |
+| --- | --- | --- |
+| `AUTO` | `0` | Automatically determine the best dtype for distance computation based on the dataset dimensions. |
+| `FP32` | `1` | Use fp32 distance computation for better precision at the cost of performance and memory usage. |
+| `FP16` | `2` | Use fp16 distance computation. |
 
 <a id="neighbors-nn-descent-index-params"></a>
 ### neighbors::nn_descent::index_params
 
-Parameters used to build an nn-descent index - `graph_degree`: For an input dataset of dimensions (N, D), determines the final dimensions of the all-neighbors knn graph which turns out to be of dimensions (N, graph_degree) - `intermediate_graph_degree`: Internally, nn-descent builds an all-neighbors knn graph of dimensions (N, intermediate_graph_degree) before selecting the final `graph_degree` neighbors. It's recommended that `intermediate_graph_degree` &gt;= 1.5 * graph_degree - `max_iterations`: The number of iterations that nn-descent will refine the graph for. More iterations produce a better quality graph at cost of performance - `termination_threshold`: The delta at which nn-descent will terminate its iterations - `return_distances`: Boolean to decide whether to return distances array - `dist_comp_dtype`: dtype to use for distance computation. Defaults to `AUTO` which automatically determines the best dtype for distance computation based on the dataset dimensions. Use `FP32` for better precision at the cost of performance and memory usage. This option is only valid when data type is fp32. Use `FP16` for better performance and memory usage at the cost of precision.
+Parameters used to build an nn-descent index
 
 ```cpp
 struct index_params : cuvs::neighbors::index_params {
@@ -49,12 +49,12 @@ struct index_params : cuvs::neighbors::index_params {
 
 | Name | Type | Description |
 | --- | --- | --- |
-| `graph_degree` | `size_t` |  |
-| `intermediate_graph_degree` | `size_t` |  |
-| `max_iterations` | `size_t` |  |
-| `termination_threshold` | `float` |  |
-| `return_distances` | `bool` |  |
-| `dist_comp_dtype` | [`DIST_COMP_DTYPE`](/api-reference/cpp-api-neighbors-nn-descent#neighbors-nn-descent-dist-comp-dtype) |  |
+| `graph_degree` | `size_t` | For an input dataset of dimensions (N, D), determines the final dimensions of the all-neighbors knn graph which turns out to be of dimensions (N, graph_degree) |
+| `intermediate_graph_degree` | `size_t` | Internally, nn-descent builds an all-neighbors knn graph of dimensions (N, intermediate_graph_degree) before selecting the final `graph_degree` neighbors. It's recommended that `intermediate_graph_degree` &gt;= 1.5 * graph_degree |
+| `max_iterations` | `size_t` | The number of iterations that nn-descent will refine the graph for. More iterations produce a better quality graph at cost of performance |
+| `termination_threshold` | `float` | The delta at which nn-descent will terminate its iterations |
+| `return_distances` | `bool` | Boolean to decide whether to return distances array |
+| `dist_comp_dtype` | [`DIST_COMP_DTYPE`](/api-reference/cpp-api-neighbors-nn-descent#neighbors-nn-descent-dist-comp-dtype) | dtype to use for distance computation.<br />Defaults to `AUTO` which automatically determines the best dtype for distance computation based on the dataset dimensions.<br />Use `FP32` for better precision at the cost of performance and memory usage. This option is only valid when data type is fp32.<br />Use `FP16` for better performance and memory usage at the cost of precision. |
 
 <a id="neighbors-nn-descent-index-params-index-params"></a>
 ### neighbors::nn_descent::index_params::index_params
@@ -70,8 +70,8 @@ cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Expanded);
 
 | Name | Direction | Type | Description |
 | --- | --- | --- | --- |
-| `graph_degree` |  | `size_t` | output graph degree Default: `64`. |
-| `metric` |  | [`cuvs::distance::DistanceType`](/api-reference/cpp-api-distance-distance#distance-distancetype) | distance metric to use Default: `cuvs::distance::DistanceType::L2Expanded`. |
+| `graph_degree` |  | `size_t` | output graph degree<br />Default: `64`. |
+| `metric` |  | [`cuvs::distance::DistanceType`](/api-reference/cpp-api-distance-distance#distance-distancetype) | distance metric to use<br />Default: `cuvs::distance::DistanceType::L2Expanded`. |
 
 **Returns**
 
@@ -113,8 +113,8 @@ This constructor creates an nn-descent index which is a knn-graph in host memory
 | `res` |  | `raft::resources const&` | raft::resources is an object managing resources |
 | `n_rows` |  | `int64_t` | number of rows in knn-graph |
 | `n_cols` |  | `int64_t` | number of cols in knn-graph |
-| `return_distances` |  | `bool` | whether to return distances Default: `false`. |
-| `metric` |  | [`cuvs::distance::DistanceType`](/api-reference/cpp-api-distance-distance#distance-distancetype) | distance metric to use Default: `cuvs::distance::DistanceType::L2Expanded`. |
+| `return_distances` |  | `bool` | whether to return distances<br />Default: `false`. |
+| `metric` |  | [`cuvs::distance::DistanceType`](/api-reference/cpp-api-distance-distance#distance-distancetype) | distance metric to use<br />Default: `cuvs::distance::DistanceType::L2Expanded`. |
 
 **Returns**
 
@@ -140,8 +140,8 @@ This constructor creates an nn-descent index using a user allocated host memory
 | --- | --- | --- | --- |
 | `res` |  | `raft::resources const&` | raft::resources is an object managing resources |
 | `graph_view` |  | `raft::host_matrix_view<IdxT, int64_t, raft::row_major>` | raft::host_matrix_view&lt;IdxT, int64_t, raft::row_major&gt; for storing knn-graph |
-| `distances_view` |  | `std::optional<raft::device_matrix_view<float, int64_t, row_major>>` | optional raft::device_matrix_view&lt;float, int64_t, row_major&gt; for storing distances Default: `std::nullopt`. |
-| `metric` |  | [`cuvs::distance::DistanceType`](/api-reference/cpp-api-distance-distance#distance-distancetype) | distance metric to use Default: `cuvs::distance::DistanceType::L2Expanded`. |
+| `distances_view` |  | `std::optional<raft::device_matrix_view<float, int64_t, row_major>>` | optional raft::device_matrix_view&lt;float, int64_t, row_major&gt; for storing distances<br />Default: `std::nullopt`. |
+| `metric` |  | [`cuvs::distance::DistanceType`](/api-reference/cpp-api-distance-distance#distance-distancetype) | distance metric to use<br />Default: `cuvs::distance::DistanceType::L2Expanded`. |
 
 **Returns**
 
@@ -246,7 +246,7 @@ Usage example:
 | `res` | in | `raft::resources const&` | raft::resources is an object managing resources |
 | `params` | in | [`index_params const&`](/api-reference/cpp-api-neighbors-nn-descent#neighbors-nn-descent-index-params) | an instance of nn_descent::index_params that are parameters to run the nn-descent algorithm |
 | `dataset` | in | `raft::device_matrix_view<const float, int64_t, raft::row_major>` | raft::device_matrix_view input dataset expected to be located in device memory |
-| `graph` | in | `std::optional<raft::host_matrix_view<uint32_t, int64_t, raft::row_major>>` | optional raft::host_matrix_view&lt;uint32_t, int64_t, raft::row_major&gt; for owning the output graph Default: `std::nullopt`. |
+| `graph` | in | `std::optional<raft::host_matrix_view<uint32_t, int64_t, raft::row_major>>` | optional raft::host_matrix_view&lt;uint32_t, int64_t, raft::row_major&gt; for owning the output graph<br />Default: `std::nullopt`. |
 
 **Returns**
 
@@ -288,7 +288,7 @@ Usage example:
 | `res` |  | `raft::resources const&` | raft::resources is an object managing resources |
 | `params` | in | [`index_params const&`](/api-reference/cpp-api-neighbors-nn-descent#neighbors-nn-descent-index-params) | an instance of nn_descent::index_params that are parameters to run the nn-descent algorithm |
 | `dataset` | in | `raft::host_matrix_view<const float, int64_t, raft::row_major>` | raft::host_matrix_view input dataset expected to be located in host memory |
-| `graph` | in | `std::optional<raft::host_matrix_view<uint32_t, int64_t, raft::row_major>>` | optional raft::host_matrix_view&lt;uint32_t, int64_t, raft::row_major&gt; for owning the output graph Default: `std::nullopt`. |
+| `graph` | in | `std::optional<raft::host_matrix_view<uint32_t, int64_t, raft::row_major>>` | optional raft::host_matrix_view&lt;uint32_t, int64_t, raft::row_major&gt; for owning the output graph<br />Default: `std::nullopt`. |
 
 **Returns**
 
@@ -323,7 +323,7 @@ Usage example:
 | `res` | in | `raft::resources const&` | raft::resources is an object managing resources |
 | `params` | in | [`index_params const&`](/api-reference/cpp-api-neighbors-nn-descent#neighbors-nn-descent-index-params) | an instance of nn_descent::index_params that are parameters to run the nn-descent algorithm |
 | `dataset` | in | `raft::device_matrix_view<const half, int64_t, raft::row_major>` | raft::device_matrix_view input dataset expected to be located in device memory |
-| `graph` | in | `std::optional<raft::host_matrix_view<uint32_t, int64_t, raft::row_major>>` | optional raft::host_matrix_view&lt;uint32_t, int64_t, raft::row_major&gt; for owning the output graph Default: `std::nullopt`. |
+| `graph` | in | `std::optional<raft::host_matrix_view<uint32_t, int64_t, raft::row_major>>` | optional raft::host_matrix_view&lt;uint32_t, int64_t, raft::row_major&gt; for owning the output graph<br />Default: `std::nullopt`. |
 
 **Returns**
 
@@ -365,7 +365,7 @@ Usage example:
 | `res` |  | `raft::resources const&` | raft::resources is an object managing resources |
 | `params` | in | [`index_params const&`](/api-reference/cpp-api-neighbors-nn-descent#neighbors-nn-descent-index-params) | an instance of nn_descent::index_params that are parameters to run the nn-descent algorithm |
 | `dataset` | in | `raft::host_matrix_view<const half, int64_t, raft::row_major>` | raft::host_matrix_view input dataset expected to be located in host memory |
-| `graph` | in | `std::optional<raft::host_matrix_view<uint32_t, int64_t, raft::row_major>>` | optional raft::host_matrix_view&lt;uint32_t, int64_t, raft::row_major&gt; for owning the output graph Default: `std::nullopt`. |
+| `graph` | in | `std::optional<raft::host_matrix_view<uint32_t, int64_t, raft::row_major>>` | optional raft::host_matrix_view&lt;uint32_t, int64_t, raft::row_major&gt; for owning the output graph<br />Default: `std::nullopt`. |
 
 **Returns**
 
@@ -401,7 +401,7 @@ Usage example:
 | `res` | in | `raft::resources const&` | raft::resources is an object managing resources |
 | `params` | in | [`index_params const&`](/api-reference/cpp-api-neighbors-nn-descent#neighbors-nn-descent-index-params) | an instance of nn_descent::index_params that are parameters to run the nn-descent algorithm |
 | `dataset` | in | `raft::device_matrix_view<const int8_t, int64_t, raft::row_major>` | raft::device_matrix_view input dataset expected to be located in device memory |
-| `graph` | in | `std::optional<raft::host_matrix_view<uint32_t, int64_t, raft::row_major>>` | optional raft::host_matrix_view&lt;uint32_t, int64_t, raft::row_major&gt; for owning the output graph Default: `std::nullopt`. |
+| `graph` | in | `std::optional<raft::host_matrix_view<uint32_t, int64_t, raft::row_major>>` | optional raft::host_matrix_view&lt;uint32_t, int64_t, raft::row_major&gt; for owning the output graph<br />Default: `std::nullopt`. |
 
 **Returns**
 
@@ -444,7 +444,7 @@ Usage example:
 | `res` |  | `raft::resources const&` | raft::resources is an object managing resources |
 | `params` | in | [`index_params const&`](/api-reference/cpp-api-neighbors-nn-descent#neighbors-nn-descent-index-params) | an instance of nn_descent::index_params that are parameters to run the nn-descent algorithm |
 | `dataset` | in | `raft::host_matrix_view<const int8_t, int64_t, raft::row_major>` | raft::host_matrix_view input dataset expected to be located in host memory |
-| `graph` | in | `std::optional<raft::host_matrix_view<uint32_t, int64_t, raft::row_major>>` | optional raft::host_matrix_view&lt;uint32_t, int64_t, raft::row_major&gt; for owning the output graph Default: `std::nullopt`. |
+| `graph` | in | `std::optional<raft::host_matrix_view<uint32_t, int64_t, raft::row_major>>` | optional raft::host_matrix_view&lt;uint32_t, int64_t, raft::row_major&gt; for owning the output graph<br />Default: `std::nullopt`. |
 
 **Returns**
 
@@ -480,7 +480,7 @@ Usage example:
 | `res` | in | `raft::resources const&` | raft::resources is an object managing resources |
 | `params` | in | [`index_params const&`](/api-reference/cpp-api-neighbors-nn-descent#neighbors-nn-descent-index-params) | an instance of nn_descent::index_params that are parameters to run the nn-descent algorithm |
 | `dataset` | in | `raft::device_matrix_view<const uint8_t, int64_t, raft::row_major>` | raft::device_matrix_view input dataset expected to be located in device memory |
-| `graph` | in | `std::optional<raft::host_matrix_view<uint32_t, int64_t, raft::row_major>>` | optional raft::host_matrix_view&lt;uint32_t, int64_t, raft::row_major&gt; for owning the output graph Default: `std::nullopt`. |
+| `graph` | in | `std::optional<raft::host_matrix_view<uint32_t, int64_t, raft::row_major>>` | optional raft::host_matrix_view&lt;uint32_t, int64_t, raft::row_major&gt; for owning the output graph<br />Default: `std::nullopt`. |
 
 **Returns**
 
@@ -523,7 +523,7 @@ Usage example:
 | `res` |  | `raft::resources const&` | raft::resources is an object managing resources |
 | `params` | in | [`index_params const&`](/api-reference/cpp-api-neighbors-nn-descent#neighbors-nn-descent-index-params) | an instance of nn_descent::index_params that are parameters to run the nn-descent algorithm |
 | `dataset` | in | `raft::host_matrix_view<const uint8_t, int64_t, raft::row_major>` | raft::host_matrix_view input dataset expected to be located in host memory |
-| `graph` | in | `std::optional<raft::host_matrix_view<uint32_t, int64_t, raft::row_major>>` | optional raft::host_matrix_view&lt;uint32_t, int64_t, raft::row_major&gt; for owning the output graph Default: `std::nullopt`. |
+| `graph` | in | `std::optional<raft::host_matrix_view<uint32_t, int64_t, raft::row_major>>` | optional raft::host_matrix_view&lt;uint32_t, int64_t, raft::row_major&gt; for owning the output graph<br />Default: `std::nullopt`. |
 
 **Returns**
 
diff --git a/fern/pages/cpp_api/cpp-api-neighbors-refine.md b/fern/pages/cpp_api/cpp-api-neighbors-refine.md
index f005e51f1c..763042edb2 100644
--- a/fern/pages/cpp_api/cpp-api-neighbors-refine.md
+++ b/fern/pages/cpp_api/cpp-api-neighbors-refine.md
@@ -39,7 +39,7 @@ Example usage
 | `neighbor_candidates` | in | `raft::device_matrix_view<const int64_t, int64_t, raft::row_major>` | indices of candidate vectors [n_queries, n_candidates], where n_candidates &gt;= k |
 | `indices` | out | `raft::device_matrix_view<int64_t, int64_t, raft::row_major>` | device matrix that stores the refined indices [n_queries, k] |
 | `distances` | out | `raft::device_matrix_view<float, int64_t, raft::row_major>` | device matrix that stores the refined distances [n_queries, k] |
-| `metric` | in | [`cuvs::distance::DistanceType`](/api-reference/cpp-api-distance-distance#distance-distancetype) | distance metric to use. Euclidean (L2) is used by default Default: `cuvs::distance::DistanceType::L2Unexpanded`. |
+| `metric` | in | [`cuvs::distance::DistanceType`](/api-reference/cpp-api-distance-distance#distance-distancetype) | distance metric to use. Euclidean (L2) is used by default<br />Default: `cuvs::distance::DistanceType::L2Unexpanded`. |
 
 **Returns**
 
@@ -75,7 +75,7 @@ Example usage
 | `neighbor_candidates` | in | `raft::device_matrix_view<const uint32_t, int64_t, raft::row_major>` | indices of candidate vectors [n_queries, n_candidates], where n_candidates &gt;= k |
 | `indices` | out | `raft::device_matrix_view<uint32_t, int64_t, raft::row_major>` | device matrix that stores the refined indices [n_queries, k] |
 | `distances` | out | `raft::device_matrix_view<float, int64_t, raft::row_major>` | device matrix that stores the refined distances [n_queries, k] |
-| `metric` | in | [`cuvs::distance::DistanceType`](/api-reference/cpp-api-distance-distance#distance-distancetype) | distance metric to use. Euclidean (L2) is used by default Default: `cuvs::distance::DistanceType::L2Unexpanded`. |
+| `metric` | in | [`cuvs::distance::DistanceType`](/api-reference/cpp-api-distance-distance#distance-distancetype) | distance metric to use. Euclidean (L2) is used by default<br />Default: `cuvs::distance::DistanceType::L2Unexpanded`. |
 
 **Returns**
 
@@ -111,7 +111,7 @@ Example usage
 | `neighbor_candidates` | in | `raft::device_matrix_view<const int64_t, int64_t, raft::row_major>` | indices of candidate vectors [n_queries, n_candidates], where n_candidates &gt;= k |
 | `indices` | out | `raft::device_matrix_view<int64_t, int64_t, raft::row_major>` | device matrix that stores the refined indices [n_queries, k] |
 | `distances` | out | `raft::device_matrix_view<float, int64_t, raft::row_major>` | device matrix that stores the refined distances [n_queries, k] |
-| `metric` | in | [`cuvs::distance::DistanceType`](/api-reference/cpp-api-distance-distance#distance-distancetype) | distance metric to use. Euclidean (L2) is used by default Default: `cuvs::distance::DistanceType::L2Unexpanded`. |
+| `metric` | in | [`cuvs::distance::DistanceType`](/api-reference/cpp-api-distance-distance#distance-distancetype) | distance metric to use. Euclidean (L2) is used by default<br />Default: `cuvs::distance::DistanceType::L2Unexpanded`. |
 
 **Returns**
 
@@ -147,7 +147,7 @@ Example usage
 | `neighbor_candidates` | in | `raft::device_matrix_view<const int64_t, int64_t, raft::row_major>` | indices of candidate vectors [n_queries, n_candidates], where n_candidates &gt;= k |
 | `indices` | out | `raft::device_matrix_view<int64_t, int64_t, raft::row_major>` | device matrix that stores the refined indices [n_queries, k] |
 | `distances` | out | `raft::device_matrix_view<float, int64_t, raft::row_major>` | device matrix that stores the refined distances [n_queries, k] |
-| `metric` | in | [`cuvs::distance::DistanceType`](/api-reference/cpp-api-distance-distance#distance-distancetype) | distance metric to use. Euclidean (L2) is used by default Default: `cuvs::distance::DistanceType::L2Unexpanded`. |
+| `metric` | in | [`cuvs::distance::DistanceType`](/api-reference/cpp-api-distance-distance#distance-distancetype) | distance metric to use. Euclidean (L2) is used by default<br />Default: `cuvs::distance::DistanceType::L2Unexpanded`. |
 
 **Returns**
 
@@ -183,7 +183,7 @@ Example usage
 | `neighbor_candidates` | in | `raft::device_matrix_view<const int64_t, int64_t, raft::row_major>` | indices of candidate vectors [n_queries, n_candidates], where n_candidates &gt;= k |
 | `indices` | out | `raft::device_matrix_view<int64_t, int64_t, raft::row_major>` | device matrix that stores the refined indices [n_queries, k] |
 | `distances` | out | `raft::device_matrix_view<float, int64_t, raft::row_major>` | device matrix that stores the refined distances [n_queries, k] |
-| `metric` | in | [`cuvs::distance::DistanceType`](/api-reference/cpp-api-distance-distance#distance-distancetype) | distance metric to use. Euclidean (L2) is used by default Default: `cuvs::distance::DistanceType::L2Unexpanded`. |
+| `metric` | in | [`cuvs::distance::DistanceType`](/api-reference/cpp-api-distance-distance#distance-distancetype) | distance metric to use. Euclidean (L2) is used by default<br />Default: `cuvs::distance::DistanceType::L2Unexpanded`. |
 
 **Returns**
 
@@ -219,7 +219,7 @@ Example usage
 | `neighbor_candidates` | in | `raft::host_matrix_view<const int64_t, int64_t, raft::row_major>` | host matrix with indices of candidate vectors [n_queries, n_candidates], where n_candidates &gt;= k |
 | `indices` | out | `raft::host_matrix_view<int64_t, int64_t, raft::row_major>` | host matrix that stores the refined indices [n_queries, k] |
 | `distances` | out | `raft::host_matrix_view<float, int64_t, raft::row_major>` | host matrix that stores the refined distances [n_queries, k] |
-| `metric` | in | [`cuvs::distance::DistanceType`](/api-reference/cpp-api-distance-distance#distance-distancetype) | distance metric to use. Euclidean (L2) is used by default Default: `cuvs::distance::DistanceType::L2Unexpanded`. |
+| `metric` | in | [`cuvs::distance::DistanceType`](/api-reference/cpp-api-distance-distance#distance-distancetype) | distance metric to use. Euclidean (L2) is used by default<br />Default: `cuvs::distance::DistanceType::L2Unexpanded`. |
 
 **Returns**
 
@@ -255,7 +255,7 @@ Example usage
 | `neighbor_candidates` | in | `raft::host_matrix_view<const uint32_t, int64_t, raft::row_major>` | host matrix with indices of candidate vectors [n_queries, n_candidates], where n_candidates &gt;= k |
 | `indices` | out | `raft::host_matrix_view<uint32_t, int64_t, raft::row_major>` | host matrix that stores the refined indices [n_queries, k] |
 | `distances` | out | `raft::host_matrix_view<float, int64_t, raft::row_major>` | host matrix that stores the refined distances [n_queries, k] |
-| `metric` | in | [`cuvs::distance::DistanceType`](/api-reference/cpp-api-distance-distance#distance-distancetype) | distance metric to use. Euclidean (L2) is used by default Default: `cuvs::distance::DistanceType::L2Unexpanded`. |
+| `metric` | in | [`cuvs::distance::DistanceType`](/api-reference/cpp-api-distance-distance#distance-distancetype) | distance metric to use. Euclidean (L2) is used by default<br />Default: `cuvs::distance::DistanceType::L2Unexpanded`. |
 
 **Returns**
 
@@ -291,7 +291,7 @@ Example usage
 | `neighbor_candidates` | in | `raft::host_matrix_view<const int64_t, int64_t, raft::row_major>` | host matrix with indices of candidate vectors [n_queries, n_candidates], where n_candidates &gt;= k |
 | `indices` | out | `raft::host_matrix_view<int64_t, int64_t, raft::row_major>` | host matrix that stores the refined indices [n_queries, k] |
 | `distances` | out | `raft::host_matrix_view<float, int64_t, raft::row_major>` | host matrix that stores the refined distances [n_queries, k] |
-| `metric` | in | [`cuvs::distance::DistanceType`](/api-reference/cpp-api-distance-distance#distance-distancetype) | distance metric to use. Euclidean (L2) is used by default Default: `cuvs::distance::DistanceType::L2Unexpanded`. |
+| `metric` | in | [`cuvs::distance::DistanceType`](/api-reference/cpp-api-distance-distance#distance-distancetype) | distance metric to use. Euclidean (L2) is used by default<br />Default: `cuvs::distance::DistanceType::L2Unexpanded`. |
 
 **Returns**
 
@@ -327,7 +327,7 @@ Example usage
 | `neighbor_candidates` | in | `raft::host_matrix_view<const int64_t, int64_t, raft::row_major>` | host matrix with indices of candidate vectors [n_queries, n_candidates], where n_candidates &gt;= k |
 | `indices` | out | `raft::host_matrix_view<int64_t, int64_t, raft::row_major>` | host matrix that stores the refined indices [n_queries, k] |
 | `distances` | out | `raft::host_matrix_view<float, int64_t, raft::row_major>` | host matrix that stores the refined distances [n_queries, k] |
-| `metric` | in | [`cuvs::distance::DistanceType`](/api-reference/cpp-api-distance-distance#distance-distancetype) | distance metric to use. Euclidean (L2) is used by default Default: `cuvs::distance::DistanceType::L2Unexpanded`. |
+| `metric` | in | [`cuvs::distance::DistanceType`](/api-reference/cpp-api-distance-distance#distance-distancetype) | distance metric to use. Euclidean (L2) is used by default<br />Default: `cuvs::distance::DistanceType::L2Unexpanded`. |
 
 **Returns**
 
@@ -363,7 +363,7 @@ Example usage
 | `neighbor_candidates` | in | `raft::host_matrix_view<const int64_t, int64_t, raft::row_major>` | host matrix with indices of candidate vectors [n_queries, n_candidates], where n_candidates &gt;= k |
 | `indices` | out | `raft::host_matrix_view<int64_t, int64_t, raft::row_major>` | host matrix that stores the refined indices [n_queries, k] |
 | `distances` | out | `raft::host_matrix_view<float, int64_t, raft::row_major>` | host matrix that stores the refined distances [n_queries, k] |
-| `metric` | in | [`cuvs::distance::DistanceType`](/api-reference/cpp-api-distance-distance#distance-distancetype) | distance metric to use. Euclidean (L2) is used by default Default: `cuvs::distance::DistanceType::L2Unexpanded`. |
+| `metric` | in | [`cuvs::distance::DistanceType`](/api-reference/cpp-api-distance-distance#distance-distancetype) | distance metric to use. Euclidean (L2) is used by default<br />Default: `cuvs::distance::DistanceType::L2Unexpanded`. |
 
 **Returns**
 
diff --git a/fern/pages/cpp_api/cpp-api-neighbors-vamana.md b/fern/pages/cpp_api/cpp-api-neighbors-vamana.md
index 039d33a2cd..f33e889ae2 100644
--- a/fern/pages/cpp_api/cpp-api-neighbors-vamana.md
+++ b/fern/pages/cpp_api/cpp-api-neighbors-vamana.md
@@ -532,8 +532,8 @@ Matches the file format used by the DiskANN open-source repository, allowing cro
 | `handle` | in | `raft::resources const&` | the raft handle |
 | `file_prefix` | in | `const std::string&` | prefix of path and name of index files |
 | `index` | in | [`const cuvs::neighbors::vamana::index<float, uint32_t>&`](/api-reference/cpp-api-neighbors-vamana#neighbors-vamana-index) | Vamana index |
-| `include_dataset` | in | `bool` | whether or not to serialize the dataset Default: `true`. |
-| `sector_aligned` | in | `bool` | whether output file should be aligned to disk sectors of 4096 bytes Default: `false`. |
+| `include_dataset` | in | `bool` | whether or not to serialize the dataset<br />Default: `true`. |
+| `sector_aligned` | in | `bool` | whether output file should be aligned to disk sectors of 4096 bytes<br />Default: `false`. |
 
 **Returns**
 
@@ -560,8 +560,8 @@ Matches the file format used by the DiskANN open-source repository, allowing cro
 | `handle` | in | `raft::resources const&` | the raft handle |
 | `file_prefix` | in | `const std::string&` | prefix of path and name of index files |
 | `index` | in | [`const cuvs::neighbors::vamana::index<int8_t, uint32_t>&`](/api-reference/cpp-api-neighbors-vamana#neighbors-vamana-index) | Vamana index |
-| `include_dataset` | in | `bool` | whether or not to serialize the dataset Default: `true`. |
-| `sector_aligned` | in | `bool` | whether output file should be aligned to disk sectors of 4096 bytes Default: `false`. |
+| `include_dataset` | in | `bool` | whether or not to serialize the dataset<br />Default: `true`. |
+| `sector_aligned` | in | `bool` | whether output file should be aligned to disk sectors of 4096 bytes<br />Default: `false`. |
 
 **Returns**
 
@@ -588,8 +588,8 @@ Matches the file format used by the DiskANN open-source repository, allowing cro
 | `handle` | in | `raft::resources const&` | the raft handle |
 | `file_prefix` | in | `const std::string&` | prefix of path and name of index files |
 | `index` | in | [`const cuvs::neighbors::vamana::index<uint8_t, uint32_t>&`](/api-reference/cpp-api-neighbors-vamana#neighbors-vamana-index) | Vamana index |
-| `include_dataset` | in | `bool` | whether or not to serialize the dataset Default: `true`. |
-| `sector_aligned` | in | `bool` | whether output file should be aligned to disk sectors of 4096 bytes Default: `false`. |
+| `include_dataset` | in | `bool` | whether or not to serialize the dataset<br />Default: `true`. |
+| `sector_aligned` | in | `bool` | whether output file should be aligned to disk sectors of 4096 bytes<br />Default: `false`. |
 
 **Returns**
 
diff --git a/fern/pages/cpp_api/cpp-api-preprocessing-pca.md b/fern/pages/cpp_api/cpp-api-preprocessing-pca.md
index 38600bc8b3..b360b3ea42 100644
--- a/fern/pages/cpp_api/cpp-api-preprocessing-pca.md
+++ b/fern/pages/cpp_api/cpp-api-preprocessing-pca.md
@@ -70,7 +70,7 @@ Computes the principal components, explained variances, singular values, and col
 | `singular_vals` | out | `raft::device_vector_view<float, int64_t>` | singular values [n_components] |
 | `mu` | out | `raft::device_vector_view<float, int64_t>` | column means [n_cols] |
 | `noise_vars` | out | `raft::device_scalar_view<float, int64_t>` | noise variance (scalar) |
-| `flip_signs_based_on_U` | in | `bool` | whether to determine signs by U (true) or V.T (false) Default: `false`. |
+| `flip_signs_based_on_U` | in | `bool` | whether to determine signs by U (true) or V.T (false)<br />Default: `false`. |
 
 **Returns**
 
@@ -111,7 +111,7 @@ Computes the principal components and transforms the input data into the eigensp
 | `singular_vals` | out | `raft::device_vector_view<float, int64_t>` | singular values [n_components] |
 | `mu` | out | `raft::device_vector_view<float, int64_t>` | column means [n_cols] |
 | `noise_vars` | out | `raft::device_scalar_view<float, int64_t>` | noise variance (scalar) |
-| `flip_signs_based_on_U` | in | `bool` | whether to determine signs by U (true) or V.T (false) Default: `false`. |
+| `flip_signs_based_on_U` | in | `bool` | whether to determine signs by U (true) or V.T (false)<br />Default: `false`. |
 
 **Returns**
 
diff --git a/fern/pages/cpp_api/cpp-api-preprocessing-quantize-pq.md b/fern/pages/cpp_api/cpp-api-preprocessing-quantize-pq.md
index b394ec08ce..85bcf86fbf 100644
--- a/fern/pages/cpp_api/cpp-api-preprocessing-quantize-pq.md
+++ b/fern/pages/cpp_api/cpp-api-preprocessing-quantize-pq.md
@@ -175,7 +175,7 @@ Usage example:
 | `quant` | in | [`const quantizer<float>&`](/api-reference/cpp-api-preprocessing-quantize-pq#preprocessing-quantize-pq-quantizer) | a product quantizer |
 | `dataset` | in | `raft::device_matrix_view<const float, int64_t>` | a row-major matrix view on device or host |
 | `codes_out` | out | `raft::device_matrix_view<uint8_t, int64_t>` | a row-major matrix view on device containing the PQ codes |
-| `vq_labels` | out | `std::optional<raft::device_vector_view<uint32_t, int64_t>>` | a vector view on device containing the VQ labels when VQ is used, optional Default: `std::nullopt`. |
+| `vq_labels` | out | `std::optional<raft::device_vector_view<uint32_t, int64_t>>` | a vector view on device containing the VQ labels when VQ is used, optional<br />Default: `std::nullopt`. |
 
 **Returns**
 
@@ -248,7 +248,7 @@ std::optional<raft::device_vector_view<const uint32_t, int64_t>> vq_labels = std
 | `quant` | in | [`const quantizer<float>&`](/api-reference/cpp-api-preprocessing-quantize-pq#preprocessing-quantize-pq-quantizer) | a product quantizer |
 | `pq_codes` | in | `raft::device_matrix_view<const uint8_t, int64_t>` | a row-major matrix view on device containing the PQ codes |
 | `out` | out | `raft::device_matrix_view<float, int64_t>` | a row-major matrix view on device |
-| `vq_labels` | in | `std::optional<raft::device_vector_view<const uint32_t, int64_t>>` | a vector view on device containing the VQ labels when VQ is used, optional Default: `std::nullopt`. |
+| `vq_labels` | in | `std::optional<raft::device_vector_view<const uint32_t, int64_t>>` | a vector view on device containing the VQ labels when VQ is used, optional<br />Default: `std::nullopt`. |
 
 **Returns**
 
diff --git a/fern/pages/cpp_api/cpp-api-selection-select-k.md b/fern/pages/cpp_api/cpp-api-selection-select-k.md
index c72706b333..e4fa429f76 100644
--- a/fern/pages/cpp_api/cpp-api-selection-select-k.md
+++ b/fern/pages/cpp_api/cpp-api-selection-select-k.md
@@ -40,9 +40,9 @@ Example usage
 | `out_val` | out | `raft::device_matrix_view<float, int64_t, raft::row_major>` | output values [batch_size, k]; the k smallest/largest values from each row of the `in_val`. |
 | `out_idx` | out | `raft::device_matrix_view<int64_t, int64_t, raft::row_major>` | output payload (e.g. indices) [batch_size, k]; the payload selected together with `out_val`. |
 | `select_min` | in | `bool` | whether to select k smallest (true) or largest (false) keys. |
-| `sorted` | in | `bool` | whether to make sure selected pairs are sorted by value Default: `false`. |
-| `algo` | in | `SelectAlgo` | the selection algorithm to use Default: `SelectAlgo::kAuto`. |
-| `len_i` | in | `std::optional<raft::device_vector_view<const int64_t, int64_t>>` | optional array of size (batch_size) providing lengths for each individual row Default: `std::nullopt`. |
+| `sorted` | in | `bool` | whether to make sure selected pairs are sorted by value<br />Default: `false`. |
+| `algo` | in | `SelectAlgo` | the selection algorithm to use<br />Default: `SelectAlgo::kAuto`. |
+| `len_i` | in | `std::optional<raft::device_vector_view<const int64_t, int64_t>>` | optional array of size (batch_size) providing lengths for each individual row<br />Default: `std::nullopt`. |
 
 **Returns**
 
@@ -79,9 +79,9 @@ Example usage
 | `out_val` | out | `raft::device_matrix_view<float, int64_t, raft::row_major>` | output values [batch_size, k]; the k smallest/largest values from each row of the `in_val`. |
 | `out_idx` | out | `raft::device_matrix_view<uint32_t, int64_t, raft::row_major>` | output payload (e.g. indices) [batch_size, k]; the payload selected together with `out_val`. |
 | `select_min` | in | `bool` | whether to select k smallest (true) or largest (false) keys. |
-| `sorted` | in | `bool` | whether to make sure selected pairs are sorted by value Default: `false`. |
-| `algo` | in | `SelectAlgo` | the selection algorithm to use Default: `SelectAlgo::kAuto`. |
-| `len_i` | in | `std::optional<raft::device_vector_view<const uint32_t, int64_t>>` | optional array of size (batch_size) providing lengths for each individual row Default: `std::nullopt`. |
+| `sorted` | in | `bool` | whether to make sure selected pairs are sorted by value<br />Default: `false`. |
+| `algo` | in | `SelectAlgo` | the selection algorithm to use<br />Default: `SelectAlgo::kAuto`. |
+| `len_i` | in | `std::optional<raft::device_vector_view<const uint32_t, int64_t>>` | optional array of size (batch_size) providing lengths for each individual row<br />Default: `std::nullopt`. |
 
 **Returns**
 
@@ -118,9 +118,9 @@ Example usage
 | `out_val` | out | `raft::device_matrix_view<half, int64_t, raft::row_major>` | output values [batch_size, k]; the k smallest/largest values from each row of the `in_val`. |
 | `out_idx` | out | `raft::device_matrix_view<uint32_t, int64_t, raft::row_major>` | output payload (e.g. indices) [batch_size, k]; the payload selected together with `out_val`. |
 | `select_min` | in | `bool` | whether to select k smallest (true) or largest (false) keys. |
-| `sorted` | in | `bool` | whether to make sure selected pairs are sorted by value Default: `false`. |
-| `algo` | in | `SelectAlgo` | the selection algorithm to use Default: `SelectAlgo::kAuto`. |
-| `len_i` | in | `std::optional<raft::device_vector_view<const uint32_t, int64_t>>` | optional array of size (batch_size) providing lengths for each individual row Default: `std::nullopt`. |
+| `sorted` | in | `bool` | whether to make sure selected pairs are sorted by value<br />Default: `false`. |
+| `algo` | in | `SelectAlgo` | the selection algorithm to use<br />Default: `SelectAlgo::kAuto`. |
+| `len_i` | in | `std::optional<raft::device_vector_view<const uint32_t, int64_t>>` | optional array of size (batch_size) providing lengths for each individual row<br />Default: `std::nullopt`. |
 
 **Returns**
 
diff --git a/fern/pages/cpp_api/cpp-api-stats-silhouette-score.md b/fern/pages/cpp_api/cpp-api-stats-silhouette-score.md
index 9c567c0bc2..2acb145919 100644
--- a/fern/pages/cpp_api/cpp-api-stats-silhouette-score.md
+++ b/fern/pages/cpp_api/cpp-api-stats-silhouette-score.md
@@ -32,7 +32,7 @@ cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Unexpanded
 | `labels` | in | `raft::device_vector_view<const int, int64_t>` | the pointer to the array containing labels for every data sample (length: nRows) |
 | `silhouette_score_per_sample` | out | `std::optional<raft::device_vector_view<float, int64_t>>` | optional array populated with the silhouette score for every sample (length: nRows) |
 | `n_unique_labels` | in | `int64_t` | number of unique labels in the labels array |
-| `metric` | in | [`cuvs::distance::DistanceType`](/api-reference/cpp-api-distance-distance#distance-distancetype) | Distance metric to use. Euclidean (L2) is used by default Default: `cuvs::distance::DistanceType::L2Unexpanded`. |
+| `metric` | in | [`cuvs::distance::DistanceType`](/api-reference/cpp-api-distance-distance#distance-distancetype) | Distance metric to use. Euclidean (L2) is used by default<br />Default: `cuvs::distance::DistanceType::L2Unexpanded`. |
 
 **Returns**
 
@@ -66,7 +66,7 @@ cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Unexpanded
 | `silhouette_score_per_sample` | out | `std::optional<raft::device_vector_view<float, int64_t>>` | optional array populated with the silhouette score for every sample (length: nRows) |
 | `n_unique_labels` | in | `int64_t` | number of unique labels in the labels array |
 | `batch_size` | in | `int64_t` | number of samples per batch |
-| `metric` | in | [`cuvs::distance::DistanceType`](/api-reference/cpp-api-distance-distance#distance-distancetype) | the numerical value that maps to the type of distance metric to be used in the calculations Default: `cuvs::distance::DistanceType::L2Unexpanded`. |
+| `metric` | in | [`cuvs::distance::DistanceType`](/api-reference/cpp-api-distance-distance#distance-distancetype) | the numerical value that maps to the type of distance metric to be used in the calculations<br />Default: `cuvs::distance::DistanceType::L2Unexpanded`. |
 
 **Returns**
 
@@ -97,7 +97,7 @@ cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Unexpanded
 | `labels` | in | `raft::device_vector_view<const int, int64_t>` | the pointer to the array containing labels for every data sample (length: nRows) |
 | `silhouette_score_per_sample` | out | `std::optional<raft::device_vector_view<double, int64_t>>` | optional array populated with the silhouette score for every sample (length: nRows) |
 | `n_unique_labels` | in | `int64_t` | number of unique labels in the labels array |
-| `metric` | in | [`cuvs::distance::DistanceType`](/api-reference/cpp-api-distance-distance#distance-distancetype) | the numerical value that maps to the type of distance metric to be used in the calculations Default: `cuvs::distance::DistanceType::L2Unexpanded`. |
+| `metric` | in | [`cuvs::distance::DistanceType`](/api-reference/cpp-api-distance-distance#distance-distancetype) | the numerical value that maps to the type of distance metric to be used in the calculations<br />Default: `cuvs::distance::DistanceType::L2Unexpanded`. |
 
 **Returns**
 
@@ -130,7 +130,7 @@ cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Unexpanded
 | `silhouette_score_per_sample` | out | `std::optional<raft::device_vector_view<double, int64_t>>` | optional array populated with the silhouette score for every sample (length: nRows) |
 | `n_unique_labels` | in | `int64_t` | number of unique labels in the labels array |
 | `batch_size` | in | `int64_t` | number of samples per batch |
-| `metric` | in | [`cuvs::distance::DistanceType`](/api-reference/cpp-api-distance-distance#distance-distancetype) | the numerical value that maps to the type of distance metric to be used in the calculations Default: `cuvs::distance::DistanceType::L2Unexpanded`. |
+| `metric` | in | [`cuvs::distance::DistanceType`](/api-reference/cpp-api-distance-distance#distance-distancetype) | the numerical value that maps to the type of distance metric to be used in the calculations<br />Default: `cuvs::distance::DistanceType::L2Unexpanded`. |
 
 **Returns**
 
diff --git a/fern/pages/cpp_api/cpp-api-stats-trustworthiness-score.md b/fern/pages/cpp_api/cpp-api-stats-trustworthiness-score.md
index ea0c828a82..039b869142 100644
--- a/fern/pages/cpp_api/cpp-api-stats-trustworthiness-score.md
+++ b/fern/pages/cpp_api/cpp-api-stats-trustworthiness-score.md
@@ -33,8 +33,8 @@ int batch_size                      = 512);
 | `X` | in | `raft::device_matrix_view<const float, int64_t, raft::row_major>` | Data in original dimension |
 | `X_embedded` | in | `raft::device_matrix_view<const float, int64_t, raft::row_major>` | Data in target dimension (embedding) |
 | `n_neighbors` | in | `int` | Number of neighbors considered by trustworthiness score |
-| `metric` | in | [`cuvs::distance::DistanceType`](/api-reference/cpp-api-distance-distance#distance-distancetype) | Distance metric to use. Euclidean (L2) is used by default Default: `cuvs::distance::DistanceType::L2SqrtUnexpanded`. |
-| `batch_size` | in | `int` | Batch size Default: `512`. |
+| `metric` | in | [`cuvs::distance::DistanceType`](/api-reference/cpp-api-distance-distance#distance-distancetype) | Distance metric to use. Euclidean (L2) is used by default<br />Default: `cuvs::distance::DistanceType::L2SqrtUnexpanded`. |
+| `batch_size` | in | `int` | Batch size<br />Default: `512`. |
 
 **Returns**
 
diff --git a/fern/pages/advanced_topics.md b/fern/pages/field_guide.md
similarity index 100%
rename from fern/pages/advanced_topics.md
rename to fern/pages/field_guide.md
diff --git a/fern/pages/jit_compilation.md b/fern/pages/jit_compilation.md
index 32ed507559..b480bb7d5f 100644
--- a/fern/pages/jit_compilation.md
+++ b/fern/pages/jit_compilation.md
@@ -20,14 +20,6 @@ The following public NVIDIA cuVS C++ APIs currently trigger JIT compilation. The
 - [cuvs::neighbors::ivf_pq::search()](/api-reference/cpp-api-neighbors-ivf-pq)
 - [cuvs::neighbors::ivf_sq::search()](/api-reference/cpp-api-neighbors-ivf-sq)
 
-The following C++ APIs can also trigger JIT compilation when they call one of the search paths above internally:
-
-- [cuvs::neighbors::cagra::build()](/api-reference/cpp-api-neighbors-cagra) when graph construction uses `graph_build_params::ivf_pq_params` or `graph_build_params::iterative_search_params`
-- [cuvs::neighbors::cagra::extend()](/api-reference/cpp-api-neighbors-cagra) when adding nodes, because the extension path searches the existing CAGRA graph
-- [cuvs::neighbors::composite::composite_index::search()](/api-reference/cpp-api-neighbors-composite-index) when the composite index searches its CAGRA child indexes
-- [cuvs::neighbors::tiered_index::search()](/api-reference/cpp-api-neighbors-tiered-index) when the tiered index is backed by CAGRA, IVF-Flat, or IVF-PQ
-- [cuvs::neighbors::all_neighbors::build()](/api-reference/cpp-api-neighbors-all-neighbors) when `graph_build_params` uses IVF-PQ
-
 Custom distance metrics (UDFs) for IVF-flat search also use JIT compilation. See [UDF Usage](/user-guide/field-guide/udf-usage).
 
 For implementation details on building JIT LTO kernel fragments and linking them at runtime, see [Link-time Optimization](/developer-guide/advanced-topics/link-time-optimization).
diff --git a/fern/pages/python_api/python-api-cluster-kmeans.md b/fern/pages/python_api/python-api-cluster-kmeans.md
index 0eea49dd72..076aee0505 100644
--- a/fern/pages/python_api/python-api-cluster-kmeans.md
+++ b/fern/pages/python_api/python-api-cluster-kmeans.md
@@ -208,7 +208,7 @@ reducing ``streaming_batch_size`` to reduce GPU memory usage.
 | `params` | `KMeansParams` | Parameters to use to fit KMeans model.  For host data, ``params.streaming_batch_size`` controls how many samples are sent to the GPU per batch. |
 | `X` | `array-like` | Training instances, shape (m, k).  Accepts both device arrays (cupy / CUDA array interface) and host arrays (numpy). |
 | `centroids` | `Optional writable CUDA array interface compliant matrix` | shape (n_clusters, k) |
-| `sample_weights` | `Optional weights per observation.  Must reside on` | the same memory space as X (device or host). default: None |
+| `sample_weights` | `Optional weights per observation.  Must reside on` | the same memory space as X (device or host).<br />default: None |
 | `resources` | `cuvs.common.Resources, optional` |  |
 
 **Returns**
diff --git a/fern/pages/python_api/python-api-common.md b/fern/pages/python_api/python-api-common.md
index bb883a55a5..996cb250c0 100644
--- a/fern/pages/python_api/python-api-common.md
+++ b/fern/pages/python_api/python-api-common.md
@@ -98,7 +98,7 @@ all available GPUs.
 
 | Name | Type | Description |
 | --- | --- | --- |
-| `stream` | `int, optional` | A CUDA stream pointer to use for this resource handle. If None, a default stream will be used. |
+| `stream` | `int, optional` | A CUDA stream pointer to use for this resource handle. If None, a<br />default stream will be used. |
 | `device_ids` | `list of int, optional` | A list of device IDs to use for multi-GPU operations. If None, all available GPUs will be used. |
 
 **Examples**
diff --git a/fern/pages/python_api/python-api-neighbors-all-neighbors.md b/fern/pages/python_api/python-api-neighbors-all-neighbors.md
index 3a01e9d685..93b2409cc1 100644
--- a/fern/pages/python_api/python-api-neighbors-all-neighbors.md
+++ b/fern/pages/python_api/python-api-neighbors-all-neighbors.md
@@ -18,7 +18,7 @@ Parameters for all-neighbors k-NN graph building.
 
 | Name | Type | Description |
 | --- | --- | --- |
-| `algo` | `str or cuvsAllNeighborsAlgo` | Algorithm to use for local k-NN graph building. Options: "brute_force", "ivf_pq", "nn_descent" |
+| `algo` | `str or cuvsAllNeighborsAlgo` | Algorithm to use for local k-NN graph building.<br />Options: "brute_force", "ivf_pq", "nn_descent" |
 | `overlap_factor` | `int, default=2` | Number of clusters each point is assigned to (must be &lt; n_clusters) |
 | `n_clusters` | `int, default=1` | Number of clusters/batches to partition the dataset into (&gt; overlap_factor). Use n_clusters&gt;1 to distribute the work across GPUs. |
 | `metric` | `str or cuvsDistanceType, default="sqeuclidean"` | Distance metric to use for graph construction |
diff --git a/fern/pages/python_api/python-api-neighbors-cagra.md b/fern/pages/python_api/python-api-neighbors-cagra.md
index fb1d2165ca..3f40197e7d 100644
--- a/fern/pages/python_api/python-api-neighbors-cagra.md
+++ b/fern/pages/python_api/python-api-neighbors-cagra.md
@@ -101,7 +101,7 @@ Parameters for VPQ Compression
 
 | Name | Type | Description |
 | --- | --- | --- |
-| `pq_bits` | `int` | The bit length of the vector element after compression by PQ. Possible values: [4, 5, 6, 7, 8]. The smaller the 'pq_bits', the smaller the index size and the better the search performance, but the lower the recall. |
+| `pq_bits` | `int` | The bit length of the vector element after compression by PQ.<br />Possible values: [4, 5, 6, 7, 8]. The smaller the 'pq_bits', the smaller the index size and the better the search performance, but the lower the recall. |
 | `pq_dim` | `int` | The dimensionality of the vector after compression by PQ. When zero, an optimal value is selected using a heuristic. |
 | `vq_n_centers` | `int` | Vector Quantization (VQ) codebook size - number of "coarse cluster centers". When zero, an optimal value is selected using a heuristic. |
 | `kmeans_n_iters` | `int` | The number of iterations searching for kmeans centers (both VQ & PQ phases). |
@@ -265,10 +265,10 @@ Parameters to build index for CAGRA nearest neighbor search
 
 | Name | Type | Description |
 | --- | --- | --- |
-| `metric` | `str, default = "sqeuclidean"` | String denoting the metric type, valid values for metric are ["sqeuclidean", "inner_product", "cosine"], where:<br /><br />- sqeuclidean is the euclidean distance without the square root operation, i.e.: distance(a,b) = \\sum_i (a_i - b_i)^2<br />- inner_product distance is defined as distance(a, b) = \\sum_i a_i * b_i.<br />- cosine distance is defined as distance(a, b) = 1 - \\sum_i a_i * b_i / ( \|\|a\|\|_2 * \|\|b\|\|_2). |
+| `metric` | `str, default = "sqeuclidean"` | String denoting the metric type,<br />valid values for metric are ["sqeuclidean", "inner_product", "cosine"], where:<br /><br />- sqeuclidean is the euclidean distance without the square root operation, i.e.: distance(a,b) = \\sum_i (a_i - b_i)^2<br />- inner_product distance is defined as distance(a, b) = \\sum_i a_i * b_i.<br />- cosine distance is defined as distance(a, b) = 1 - \\sum_i a_i * b_i / ( \|\|a\|\|_2 * \|\|b\|\|_2). |
 | `intermediate_graph_degree` | `int, default = 128` |  |
 | `graph_degree` | `int, default = 64` |  |
-| `build_algo` | `str, default = "ivf_pq"` | string denoting the graph building algorithm to use. Valid values for algo: ["ivf_pq", "nn_descent", "iterative_cagra_search", "ace"], where<br /><br />- ivf_pq will use the IVF-PQ algorithm for building the knn graph<br />- nn_descent (experimental) will use the NN-Descent algorithm for building the knn graph. It is expected to be generally faster than ivf_pq.<br />- iterative_cagra_search will iteratively build the knn graph using CAGRA's search() and optimize()<br />- ace will use ACE (Augmented Core Extraction) for building indices for datasets too large to fit in GPU memory |
+| `build_algo` | `str, default = "ivf_pq"` | string denoting the graph building algorithm to use.<br />Valid values for algo: ["ivf_pq", "nn_descent", "iterative_cagra_search", "ace"], where<br /><br />- ivf_pq will use the IVF-PQ algorithm for building the knn graph<br />- nn_descent (experimental) will use the NN-Descent algorithm for building the knn graph. It is expected to be generally faster than ivf_pq.<br />- iterative_cagra_search will iteratively build the knn graph using CAGRA's search() and optimize()<br />- ace will use ACE (Augmented Core Extraction) for building indices for datasets too large to fit in GPU memory |
 | `compression` | `CompressionParams, optional` | If compression is desired should be a CompressionParams object. If None compression will be disabled. |
 | `ivf_pq_build_params` | `cuvs.neighbors.ivf_pq.IndexParams, optional` | Parameters for IVF-PQ algorithm. If provided, it will be used for building the graph. |
 | `ivf_pq_search_params` | `cuvs.neighbors.ivf_pq.SearchParams, optional` | Parameters for IVF-PQ search. If provided, it will be used for searching the graph. |
@@ -350,12 +350,12 @@ CAGRA search parameters
 | `max_queries` | `int, default = 0` | Maximum number of queries to search at the same time (batch size). Auto select when 0. |
 | `itopk_size` | `int, default = 64` | Number of intermediate search results retained during the search. This is the main knob to adjust trade off between accuracy and search speed. Higher values improve the search accuracy. |
 | `max_iterations` | `int, default = 0` | Upper limit of search iterations. Auto select when 0. |
-| `algo` | `str, default = "auto"` | String denoting the search algorithm to use Valid values for algo: ["auto", "single_cta", "multi_cta"], where:<br /><br />- auto will automatically select the best value based on query size<br />- single_cta is better when query contains larger number of vectors (e.g &gt;10)<br />- multi_cta is better when query contains only a few vectors |
+| `algo` | `str, default = "auto"` | String denoting the search algorithm to use<br />Valid values for algo: ["auto", "single_cta", "multi_cta"], where:<br /><br />- auto will automatically select the best value based on query size<br />- single_cta is better when query contains larger number of vectors (e.g &gt;10)<br />- multi_cta is better when query contains only a few vectors |
 | `team_size` | `int, default = 0` | Number of threads used to calculate a single distance. 4, 8, 16, or 32. |
 | `search_width` | `int, default = 1` | Number of graph nodes to select as the starting point for the search in each iteration. |
 | `min_iterations` | `int, default = 0` | Lower limit of search iterations. |
 | `thread_block_size` | `int, default = 0` | Thread block size. 0, 64, 128, 256, 512, 1024. Auto selection when 0. |
-| `hashmap_mode` | `str, default = "auto"` | String denoting the type of hash map to use. It's usually better to allow the algorithm to select this value, Valid values for hashmap_mode: ["auto", "small", "hash"], where:<br /><br />- auto will automatically select the best value based on algo<br />- small will use the small shared memory hash table with resetting.<br />- hash will use a single hash table in global memory. |
+| `hashmap_mode` | `str, default = "auto"` | String denoting the type of hash map to use. It's usually better to allow the algorithm to select this value,<br />Valid values for hashmap_mode: ["auto", "small", "hash"], where:<br /><br />- auto will automatically select the best value based on algo<br />- small will use the small shared memory hash table with resetting.<br />- hash will use a single hash table in global memory. |
 | `hashmap_min_bitlen` | `int, default = 0` | Upper limit of hashmap fill rate. More than 0.1, less than 0.9. |
 | `hashmap_max_fill_rate` | `float, default = 0.5` | Upper limit of hashmap fill rate. More than 0.1, less than 0.9. |
 | `num_random_samplings` | `int, default = 1` | Number of iterations of initial random seed node selection. 1 or more. |
diff --git a/fern/pages/python_api/python-api-neighbors-hnsw.md b/fern/pages/python_api/python-api-neighbors-hnsw.md
index 071b1fc95c..a27d58918e 100644
--- a/fern/pages/python_api/python-api-neighbors-hnsw.md
+++ b/fern/pages/python_api/python-api-neighbors-hnsw.md
@@ -86,11 +86,11 @@ Parameters to build index for HNSW nearest neighbor search
 
 | Name | Type | Description |
 | --- | --- | --- |
-| `hierarchy` | `string, default = "gpu" (optional)` | The hierarchy of the HNSW index. Valid values are ["none", "cpu", "gpu"].<br />- "none": No hierarchy is built.<br />- "cpu": Hierarchy is built using CPU.<br />- "gpu": Hierarchy is built using GPU. |
+| `hierarchy` | `string, default = "gpu" (optional)` | The hierarchy of the HNSW index.<br />Valid values are ["none", "cpu", "gpu"].<br />- "none": No hierarchy is built.<br />- "cpu": Hierarchy is built using CPU.<br />- "gpu": Hierarchy is built using GPU. |
 | `ef_construction` | `int, default = 200 (optional)` | Maximum number of candidate list size used during construction when hierarchy is `cpu`. |
-| `num_threads` | `int, default = 0 (optional)` | Number of CPU threads used to increase construction parallelism when hierarchy is `cpu` or `gpu`. When the value is 0, the number of threads is automatically determined to the maximum number of threads available. NOTE: When hierarchy is `gpu`, while the majority of the work is done on the GPU, initialization of the HNSW index itself and some other work is parallelized with the help of CPU threads. |
+| `num_threads` | `int, default = 0 (optional)` | Number of CPU threads used to increase construction parallelism when hierarchy is `cpu` or `gpu`. When the value is 0, the number of threads is automatically determined to the maximum number of threads available.<br />NOTE: When hierarchy is `gpu`, while the majority of the work is done on the GPU, initialization of the HNSW index itself and some other work is parallelized with the help of CPU threads. |
 | `M` | `int, default = 32 (optional)` | HNSW M parameter: number of bi-directional links per node (used when building with ACE). graph_degree = m * 2, intermediate_graph_degree = m * 3. |
-| `metric` | `string, default = "sqeuclidean" (optional)` | Distance metric to use. Valid values: ["sqeuclidean", "inner_product"] |
+| `metric` | `string, default = "sqeuclidean" (optional)` | Distance metric to use.<br />Valid values: ["sqeuclidean", "inner_product"] |
 | `ace_params` | `AceParams, default = None (optional)` | ACE parameters for building HNSW index using ACE algorithm. If set, enables the build() function to use ACE for index construction. |
 
 **Constructor**
diff --git a/fern/pages/python_api/python-api-neighbors-ivf-flat.md b/fern/pages/python_api/python-api-neighbors-ivf-flat.md
index 79d2a23b9d..a4fd37e95c 100644
--- a/fern/pages/python_api/python-api-neighbors-ivf-flat.md
+++ b/fern/pages/python_api/python-api-neighbors-ivf-flat.md
@@ -68,7 +68,7 @@ Parameters to build index for IvfFlat nearest neighbor search
 | Name | Type | Description |
 | --- | --- | --- |
 | `n_lists` | `int, default = 1024` | The number of clusters used in the coarse quantizer. |
-| `metric` | `str, default = "sqeuclidean"` | String denoting the metric type. Valid values for metric: ["sqeuclidean", "inner_product", "euclidean", "cosine"], where<br /><br />- sqeuclidean is the euclidean distance without the square root operation, i.e.: distance(a,b) = \\sum_i (a_i - b_i)^2,<br />- euclidean is the euclidean distance<br />- inner product distance is defined as distance(a, b) = \\sum_i a_i * b_i.<br />- cosine distance is defined as distance(a, b) = 1 - \\sum_i a_i * b_i / ( \|\|a\|\|_2 * \|\|b\|\|_2). |
+| `metric` | `str, default = "sqeuclidean"` | String denoting the metric type.<br />Valid values for metric: ["sqeuclidean", "inner_product", "euclidean", "cosine"], where<br /><br />- sqeuclidean is the euclidean distance without the square root operation, i.e.: distance(a,b) = \\sum_i (a_i - b_i)^2,<br />- euclidean is the euclidean distance<br />- inner product distance is defined as distance(a, b) = \\sum_i a_i * b_i.<br />- cosine distance is defined as distance(a, b) = 1 - \\sum_i a_i * b_i / ( \|\|a\|\|_2 * \|\|b\|\|_2). |
 | `kmeans_n_iters` | `int, default = 20` | The number of iterations searching for kmeans centers during index building. The default setting is often fine, but this parameter can be decreased to improve training time wih larger trainset fractions (10M+ vectors) or increased for smaller trainset fractions (very small number of vectors) to improve recall. |
 | `kmeans_trainset_fraction` | `int, default = 0.5` | If kmeans_trainset_fraction is less than 1, then the dataset is subsampled, and only n_samples * kmeans_trainset_fraction rows are used for training. |
 | `add_data_on_build` | `bool, default = True` | After training the coarse and fine quantizers, we will populate the index with the dataset if add_data_on_build == True, otherwise the index is left empty, and the extend method can be used to add new vectors to the index. |
diff --git a/fern/pages/python_api/python-api-neighbors-ivf-pq.md b/fern/pages/python_api/python-api-neighbors-ivf-pq.md
index b734e61daa..b732a86972 100644
--- a/fern/pages/python_api/python-api-neighbors-ivf-pq.md
+++ b/fern/pages/python_api/python-api-neighbors-ivf-pq.md
@@ -198,17 +198,17 @@ Parameters to build index for IvfPq nearest neighbor search
 | Name | Type | Description |
 | --- | --- | --- |
 | `n_lists` | `int, default = 1024` | The number of clusters used in the coarse quantizer. |
-| `metric` | `str, default="sqeuclidean"` | String denoting the metric type. Valid values for metric: ["sqeuclidean", "inner_product", "euclidean", "cosine"], where:<br /><br />- sqeuclidean is the euclidean distance without the square root operation, i.e.: distance(a,b) = \\sum_i (a_i - b_i)^2,<br />- euclidean is the euclidean distance<br />- inner product distance is defined as distance(a, b) = \\sum_i a_i * b_i.<br />- cosine distance is defined as distance(a, b) = 1 - \\sum_i a_i * b_i / ( \|\|a\|\|_2 * \|\|b\|\|_2). |
+| `metric` | `str, default="sqeuclidean"` | String denoting the metric type.<br />Valid values for metric: ["sqeuclidean", "inner_product", "euclidean", "cosine"], where:<br /><br />- sqeuclidean is the euclidean distance without the square root operation, i.e.: distance(a,b) = \\sum_i (a_i - b_i)^2,<br />- euclidean is the euclidean distance<br />- inner product distance is defined as distance(a, b) = \\sum_i a_i * b_i.<br />- cosine distance is defined as distance(a, b) = 1 - \\sum_i a_i * b_i / ( \|\|a\|\|_2 * \|\|b\|\|_2). |
 | `kmeans_n_iters` | `int, default = 20` | The number of iterations searching for kmeans centers during index building. |
 | `kmeans_trainset_fraction` | `int, default = 0.5` | If kmeans_trainset_fraction is less than 1, then the dataset is subsampled, and only n_samples * kmeans_trainset_fraction rows are used for training. |
 | `pq_bits` | `int, default = 8` | The bit length of the vector element after quantization. |
-| `pq_dim` | `int, default = 0` | The dimensionality of a the vector after product quantization. When zero, an optimal value is selected using a heuristic. Note pq_dim * pq_bits must be a multiple of 8. Hint: a smaller 'pq_dim' results in a smaller index size and better search performance, but lower recall. If 'pq_bits' is 8, 'pq_dim' can be set to any number, but multiple of 8 are desirable for good performance. If 'pq_bits' is not 8, 'pq_dim' should be a multiple of 8. For good performance, it is desirable that 'pq_dim' is a multiple of 32. Ideally, 'pq_dim' should be also a divisor of the dataset dim. |
+| `pq_dim` | `int, default = 0` | The dimensionality of a the vector after product quantization. When zero, an optimal value is selected using a heuristic. Note pq_dim * pq_bits must be a multiple of 8.<br />Hint: a smaller 'pq_dim' results in a smaller index size and better search performance, but lower recall. If 'pq_bits' is 8, 'pq_dim' can be set to any number, but multiple of 8 are desirable for good performance. If 'pq_bits' is not 8, 'pq_dim' should be a multiple of 8. For good performance, it is desirable that 'pq_dim' is a multiple of 32. Ideally, 'pq_dim' should be also a divisor of the dataset dim. |
 | `codebook_kind` | `string, default = "subspace"` | Valid values ["subspace", "cluster"] |
-| `force_random_rotation` | `bool, default = False` | Apply a random rotation matrix on the input data and queries even if `dim % pq_dim == 0`. Note: if `dim` is not multiple of `pq_dim`, a random rotation is always applied to the input data and queries to transform the working space from `dim` to `rot_dim`, which may be slightly larger than the original space and and is a multiple of `pq_dim` (`rot_dim % pq_dim == 0`). However, this transform is not necessary when `dim` is multiple of `pq_dim` (`dim == rot_dim`, hence no need in adding "extra" data columns / features). By default, if `dim == rot_dim`, the rotation transform is initialized with the identity matrix. When `force_random_rotation == True`, a random orthogonal transform matrix is generated regardless of the values of `dim` and `pq_dim`. |
+| `force_random_rotation` | `bool, default = False` | Apply a random rotation matrix on the input data and queries even if `dim % pq_dim == 0`.<br />Note: if `dim` is not multiple of `pq_dim`, a random rotation is always applied to the input data and queries to transform the working space from `dim` to `rot_dim`, which may be slightly larger than the original space and and is a multiple of `pq_dim` (`rot_dim % pq_dim == 0`). However, this transform is not necessary when `dim` is multiple of `pq_dim` (`dim == rot_dim`, hence no need in adding "extra" data columns / features). By<br />default, if `dim == rot_dim`, the rotation transform is initialized with the identity matrix. When `force_random_rotation == True`, a random orthogonal transform matrix is generated regardless of the values of `dim` and `pq_dim`. |
 | `add_data_on_build` | `bool, default = True` | After training the coarse and fine quantizers, we will populate the index with the dataset if add_data_on_build == True, otherwise the index is left empty, and the extend method can be used to add new vectors to the index. |
 | `conservative_memory_allocation` | `bool, default = True` | By default, the algorithm allocates more space than necessary for individual clusters (`list_data`). This allows to amortize the cost of memory allocation and reduce the number of data copies during repeated calls to `extend` (extending the database). To disable this behavior and use as little GPU memory for the database as possible, set this flat to `True`. |
 | `max_train_points_per_pq_code` | `int, default = 256` | The max number of data points to use per PQ code during PQ codebook training. Using more data points per PQ code may increase the quality of PQ codebook but may also increase the build time. The parameter is applied to both PQ codebook generation methods, i.e., PER_SUBSPACE and PER_CLUSTER. In both cases, we will use pq_book_size * max_train_points_per_pq_code training points to train each codebook. |
-| `codes_layout` | `string, default = "interleaved"` | Memory layout of the IVF-PQ list data. Valid values ["flat", "interleaved"]<br /><br />- flat: Codes are stored contiguously, one vector's codes after another.<br />- interleaved: Codes are interleaved for optimized search performance. This is the default and recommended for search workloads. |
+| `codes_layout` | `string, default = "interleaved"` | Memory layout of the IVF-PQ list data.<br />Valid values ["flat", "interleaved"]<br /><br />- flat: Codes are stored contiguously, one vector's codes after another.<br />- interleaved: Codes are interleaved for optimized search performance. This is the default and recommended for search workloads. |
 
 **Constructor**
 
@@ -346,9 +346,9 @@ Supplemental parameters to search IVF-Pq index
 | Name | Type | Description |
 | --- | --- | --- |
 | `n_probes` | `int` | The number of clusters to search. |
-| `lut_dtype` | `default = np.float32` | Data type of look up table to be created dynamically at search time. The use of low-precision types reduces the amount of shared memory required at search time, so fast shared memory kernels can be used even for datasets with large dimansionality. Note that the recall is slightly degraded when low-precision type is selected. Possible values [np.float32, np.float16, np.uint8] |
-| `internal_distance_dtype` | `default = np.float32` | Storage data type for distance/similarity computation. Possible values [np.float32, np.float16] |
-| `coarse_search_dtype` | `default = np.float32` | [Experimental] The data type to use as the GEMM element type when searching the clusters to probe. Possible values: [np.float32, np.float16, np.int8].<br />- Legacy default: np.float32<br />- Recommended for performance: np.float16 (half)<br />- Experimental/low-precision: np.int8 |
+| `lut_dtype` | `default = np.float32` | Data type of look up table to be created dynamically at search time. The use of low-precision types reduces the amount of shared memory required at search time, so fast shared memory kernels can be used even for datasets with large dimansionality. Note that the recall is slightly degraded when low-precision type is selected.<br />Possible values [np.float32, np.float16, np.uint8] |
+| `internal_distance_dtype` | `default = np.float32` | Storage data type for distance/similarity computation.<br />Possible values [np.float32, np.float16] |
+| `coarse_search_dtype` | `default = np.float32` | [Experimental] The data type to use as the GEMM element type when searching the clusters to probe.<br />Possible values: [np.float32, np.float16, np.int8].<br />- Legacy default: np.float32<br />- Recommended for performance: np.float16 (half)<br />- Experimental/low-precision: np.int8 |
 | `max_internal_batch_size` | `default = 4096` | Set the internal batch size to improve GPU utilization at the cost of larger memory footprint. |
 
 **Constructor**
diff --git a/fern/pages/python_api/python-api-neighbors-ivf-sq.md b/fern/pages/python_api/python-api-neighbors-ivf-sq.md
index 81ef078a08..5b158e77a8 100644
--- a/fern/pages/python_api/python-api-neighbors-ivf-sq.md
+++ b/fern/pages/python_api/python-api-neighbors-ivf-sq.md
@@ -63,15 +63,19 @@ cdef class IndexParams
 
 Parameters to build index for IvfSq nearest neighbor search
 
+Note: IVF-SQ currently uses fixed 8-bit residual scalar quantization.
+There are no additional SQ-specific tuning knobs.
+
 **Parameters**
 
 | Name | Type | Description |
 | --- | --- | --- |
 | `n_lists` | `int, default = 1024` | The number of clusters used in the coarse quantizer. |
-| `metric` | `str, default = "sqeuclidean"` | String denoting the metric type. Valid values for metric: ["sqeuclidean", "inner_product", "euclidean", "cosine"], where<br /><br />- sqeuclidean is the euclidean distance without the square root operation, i.e.: distance(a,b) = \\sum_i (a_i - b_i)^2,<br />- euclidean is the euclidean distance<br />- inner product distance is defined as distance(a, b) = \\sum_i a_i * b_i.<br />- cosine distance is defined as distance(a, b) = 1 - \\sum_i a_i * b_i / ( \|\|a\|\|_2 * \|\|b\|\|_2). |
+| `metric` | `str, default = "sqeuclidean"` | String denoting the metric type.<br />Valid values for metric: ["sqeuclidean", "inner_product", "euclidean", "cosine"], where<br /><br />- sqeuclidean is the euclidean distance without the square root operation, i.e.: distance(a,b) = \\sum_i (a_i - b_i)^2,<br />- euclidean is the euclidean distance<br />- inner product distance is defined as distance(a, b) = \\sum_i a_i * b_i.<br />- cosine distance is defined as distance(a, b) = 1 - \\sum_i a_i * b_i / ( \|\|a\|\|_2 * \|\|b\|\|_2). |
+| `metric_arg` | `float, default = 2.0` | Additional metric argument forwarded to cuVS distance computations. |
 | `kmeans_n_iters` | `int, default = 20` | The number of iterations searching for kmeans centers during index building. |
 | `max_train_points_per_cluster` | `int, default = 256` | The number of data vectors per cluster to use during iterative kmeans building. The index uses at most n_lists * max_train_points_per_cluster rows for training. |
-| `add_data_on_build` | `bool, default = True` | After training the coarse and fine quantizers, we will populate the index with the dataset if add_data_on_build == True, otherwise the index is left empty, and the extend method can be used to add new vectors to the index. |
+| `add_data_on_build` | `bool, default = True` | After training the coarse clustering model and residual scalar quantization parameters, we populate the index with the dataset if add_data_on_build == True. Otherwise, the index is left empty, and the extend method can be used to add new vectors to the index. |
 | `conservative_memory_allocation` | `bool, default = False` | By default, the algorithm allocates more space than necessary for individual clusters (`list_data`). This allows to amortize the cost of memory allocation and reduce the number of data copies during repeated calls to `extend` (extending the database). To disable this behavior and use as little GPU memory for the database as possible, set this flag to `True`. |
 
 **Constructor**
@@ -190,10 +194,12 @@ def build(IndexParams index_params, dataset, resources=None)
 
 Build the IvfSq index from the dataset for efficient search.
 
-IVF-SQ (Scalar Quantization) combines an IVF coarse quantizer with
+IVF-SQ (Scalar Quantization) uses IVF partitioning together with
 per-dimension scalar quantization. Each vector's residual is encoded
-as one byte per dimension, providing ~4x memory reduction vs IVF-Flat
-with higher recall than IVF-PQ at similar memory budgets.
+as one byte per dimension, which can reduce vector-storage memory by
+about 4x vs IVF-Flat for float32 inputs (about 2x for float16 inputs),
+excluding IVF structural overhead. Recall and speed trade-offs versus
+IVF-PQ are dataset and tuning dependent.
 
 **Parameters**
 
@@ -313,7 +319,7 @@ version of cuvs is not guaranteed to work.
 `@auto_sync_resources`
 
 ```python
-def save(filename, Index index, bool include_dataset=True, resources=None)
+def save(filename, Index index, resources=None)
 ```
 
 Saves the index to a file.
diff --git a/fern/pages/python_api/python-api-neighbors-mg-cagra.md b/fern/pages/python_api/python-api-neighbors-mg-cagra.md
index 1ce9f7655b..dda3c00601 100644
--- a/fern/pages/python_api/python-api-neighbors-mg-cagra.md
+++ b/fern/pages/python_api/python-api-neighbors-mg-cagra.md
@@ -41,7 +41,7 @@ Extends single-GPU IndexParams with multi-GPU specific parameters.
 
 | Name | Type | Description |
 | --- | --- | --- |
-| `distribution_mode` | `str, default = "sharded"` | Distribution mode for multi-GPU setup. Valid values: ["replicated", "sharded"] |
+| `distribution_mode` | `str, default = "sharded"` | Distribution mode for multi-GPU setup.<br />Valid values: ["replicated", "sharded"] |
 | `**kwargs` | `Additional parameters passed to single-GPU IndexParams` |  |
 
 **Note**
diff --git a/fern/pages/python_api/python-api-neighbors-mg-ivf-flat.md b/fern/pages/python_api/python-api-neighbors-mg-ivf-flat.md
index 123b13f80f..99c3f8adac 100644
--- a/fern/pages/python_api/python-api-neighbors-mg-ivf-flat.md
+++ b/fern/pages/python_api/python-api-neighbors-mg-ivf-flat.md
@@ -41,7 +41,7 @@ Extends single-GPU IndexParams with multi-GPU specific parameters.
 
 | Name | Type | Description |
 | --- | --- | --- |
-| `distribution_mode` | `str, default = "sharded"` | Distribution mode for multi-GPU setup. Valid values: ["replicated", "sharded"] |
+| `distribution_mode` | `str, default = "sharded"` | Distribution mode for multi-GPU setup.<br />Valid values: ["replicated", "sharded"] |
 | `**kwargs` | `Additional parameters passed to single-GPU IndexParams` |  |
 
 **Constructor**
diff --git a/fern/pages/python_api/python-api-neighbors-mg-ivf-pq.md b/fern/pages/python_api/python-api-neighbors-mg-ivf-pq.md
index 0e9edbf48f..7d84e3fa79 100644
--- a/fern/pages/python_api/python-api-neighbors-mg-ivf-pq.md
+++ b/fern/pages/python_api/python-api-neighbors-mg-ivf-pq.md
@@ -41,7 +41,7 @@ Extends single-GPU IndexParams with multi-GPU specific parameters.
 
 | Name | Type | Description |
 | --- | --- | --- |
-| `distribution_mode` | `str, default = "sharded"` | Distribution mode for multi-GPU setup. Valid values: ["replicated", "sharded"] |
+| `distribution_mode` | `str, default = "sharded"` | Distribution mode for multi-GPU setup.<br />Valid values: ["replicated", "sharded"] |
 | `**kwargs` | `Additional parameters passed to single-GPU IndexParams` |  |
 
 **Constructor**
diff --git a/fern/pages/python_api/python-api-neighbors-tiered-index.md b/fern/pages/python_api/python-api-neighbors-tiered-index.md
index cd91cf4d19..b2ed2e851a 100644
--- a/fern/pages/python_api/python-api-neighbors-tiered-index.md
+++ b/fern/pages/python_api/python-api-neighbors-tiered-index.md
@@ -38,7 +38,7 @@ Parameters to build index for Tiered Index nearest neighbor search
 
 | Name | Type | Description |
 | --- | --- | --- |
-| `metric` | `str, default = "sqeuclidean"` | String denoting the metric type. Valid values for metric: ["sqeuclidean", "inner_product", "euclidean", "cosine"], where<br />- sqeuclidean is the euclidean distance without the square root operation, i.e.: distance(a,b) = \\sum_i (a_i - b_i)^2,<br />- euclidean is the euclidean distance<br />- inner product distance is defined as distance(a, b) = \\sum_i a_i * b_i.<br />- cosine distance is defined as distance(a, b) = 1 - \\sum_i a_i * b_i / ( \|\|a\|\|_2 * \|\|b\|\|_2). |
+| `metric` | `str, default = "sqeuclidean"` | String denoting the metric type.<br />Valid values for metric: ["sqeuclidean", "inner_product", "euclidean", "cosine"], where<br />- sqeuclidean is the euclidean distance without the square root operation, i.e.: distance(a,b) = \\sum_i (a_i - b_i)^2,<br />- euclidean is the euclidean distance<br />- inner product distance is defined as distance(a, b) = \\sum_i a_i * b_i.<br />- cosine distance is defined as distance(a, b) = 1 - \\sum_i a_i * b_i / ( \|\|a\|\|_2 * \|\|b\|\|_2). |
 | `algo` | `str, default = "cagra"` | The algorithm to use for the ANN portion of the tiered index |
 | `upstream_params` | `object, optional` | The IndexParams for the upstream ANN object to use (ie the Cagra IndexParams for cagra etc) |
 | `min_ann_rows` | `int` | The minimum number of rows necessary to create an ann index |
diff --git a/fern/pages/python_api/python-api-preprocessing-quantize-pq.md b/fern/pages/python_api/python-api-preprocessing-quantize-pq.md
index e3fc90764f..22bcf5dd5c 100644
--- a/fern/pages/python_api/python-api-preprocessing-quantize-pq.md
+++ b/fern/pages/python_api/python-api-preprocessing-quantize-pq.md
@@ -82,13 +82,13 @@ Parameters for product quantization
 
 | Name | Type | Description |
 | --- | --- | --- |
-| `pq_bits` | `int` | specifies the bit length of the vector element after compression by PQ possible values: within [4, 16] |
+| `pq_bits` | `int` | specifies the bit length of the vector element after compression by PQ<br />possible values: within [4, 16] |
 | `pq_dim` | `int` | specifies the dimensionality of the vector after compression by PQ |
 | `use_subspaces` | `bool` | specifies whether to use subspaces for product quantization (PQ). When true, one PQ codebook is used for each subspace. Otherwise, a single PQ codebook is used. |
 | `use_vq` | `bool` | specifies whether to use Vector Quantization (KMeans) before product quantization (PQ). |
 | `vq_n_centers` | `int` | specifies the number of centers for the vector quantizer. When zero, an optimal value is selected using a heuristic. When one, only product quantization is used. |
 | `kmeans_n_iters` | `int` | specifies the number of iterations searching for kmeans centers |
-| `pq_kmeans_type` | `str` | specifies the type of kmeans algorithm to use for PQ training possible values: "kmeans", "kmeans_balanced" |
+| `pq_kmeans_type` | `str` | specifies the type of kmeans algorithm to use for PQ training<br />possible values: "kmeans", "kmeans_balanced" |
 | `max_train_points_per_pq_code` | `int` | specifies the max number of data points to use per PQ code during PQ codebook training. Using more data points per PQ code may increase the quality of PQ codebook but may also increase the build time. |
 | `max_train_points_per_vq_cluster` | `int` | specifies the max number of data points to use per VQ cluster. |
 
diff --git a/fern/pages/user_guide.md b/fern/pages/user_guide.md
index 9677cda564..6f35bfea71 100644
--- a/fern/pages/user_guide.md
+++ b/fern/pages/user_guide.md
@@ -6,68 +6,9 @@ slug: user-guide
 
 Use these guides when you are ready to apply NVIDIA cuVS APIs, benchmark algorithms, or integrate NVIDIA cuVS into a larger product.
 
-## API Guide
+## Guides
 
-- [API Guide](/user-guide/api-guides): find task-focused NVIDIA cuVS API examples for clustering, vector indexing, preprocessing, and supporting routines.
-
-### Common Types
-
-- [Common Types](/user-guide/api-guides/core-types): learn the shared array, memory, and multi-GPU resource abstractions used by NVIDIA cuVS APIs.
-- [Array Types](/user-guide/api-guides/core-types/array-types): choose between dense arrays and sparse arrays for NVIDIA cuVS APIs.
-- [Dense Arrays](/user-guide/api-guides/core-types/array-types/dense-arrays): pass dense vectors, matrices, and outputs into NVIDIA cuVS APIs across supported languages.
-- [Memory Management](/user-guide/api-guides/core-types/memory-management): configure RMM device, pool, pinned host, host, and managed memory resources for NVIDIA cuVS workflows.
-- [Multi-GPU](/user-guide/api-guides/core-types/multi-gpu): initialize multi-GPU resources and understand RAFT/NCCL communication setup.
-- [Resources](/user-guide/api-guides/core-types/resources): reuse CUDA streams, library handles, stream pools, and workspace resources across NVIDIA cuVS calls.
-- [Sparse Arrays](/user-guide/api-guides/core-types/array-types/sparse-arrays): use CSR and COO sparse matrix views with NVIDIA cuVS C++ APIs that accept sparse inputs.
-
-### Clustering Guide
-
-- [K-Means](/user-guide/api-guides/clustering-guide/k-means): partition vectors into a fixed number of clusters, often as part of scalable vector-search systems.
-- [Single-linkage](/user-guide/api-guides/clustering-guide/single-linkage): build hierarchical clusters from nearest-neighbor relationships.
-- [Spectral Clustering](/user-guide/api-guides/clustering-guide/spectral-clustering): use graph structure and spectral methods to identify clusters with more complex shapes.
-
-### Indexing Guide
-
-- [Brute-force](/user-guide/api-guides/indexing-guide/brute-force): run exact nearest-neighbor search by comparing each query with every vector.
-- [CAGRA](/user-guide/api-guides/indexing-guide/cagra): build and search GPU-optimized graph indexes for high-throughput ANN search.
-- [NN-Descent](/user-guide/api-guides/indexing-guide/nn-descent): build approximate nearest-neighbor graphs with an iterative algorithm.
-- [IVF-Flat](/user-guide/api-guides/indexing-guide/ivf-flat): partition vectors into inverted-file lists while storing full-precision vectors.
-- [IVF-PQ](/user-guide/api-guides/indexing-guide/ivf-pq): combine inverted-file partitioning with product quantization for compact indexes.
-- [ScaNN](/user-guide/api-guides/indexing-guide/sca-nn): combine partitioning, quantization, and refinement for high-quality approximate search.
-- [Vamana](/user-guide/api-guides/indexing-guide/vamana): build graph indexes for large-scale and disk-backed search workflows.
-- [All-neighbors](/user-guide/api-guides/indexing-guide/all-neighbors): compute all-neighbors graph structures.
-
-### Preprocessing Guide
-
-- [Binary Quantizer](/user-guide/api-guides/preprocessing-guide/binary-quantizer): compress vectors into binary representations for compact storage and fast comparisons.
-- [PCA](/user-guide/api-guides/preprocessing-guide/pca): reduce dimensionality with a linear projection while preserving as much variance as possible.
-- [Product Quantization](/user-guide/api-guides/preprocessing-guide/product-quantization): split vectors into subvectors and encode each part with compact codebooks.
-- [Scalar Quantizer](/user-guide/api-guides/preprocessing-guide/scalar-quantizer): compress each vector dimension independently with scalar quantization.
-- [Spectral Embedding](/user-guide/api-guides/preprocessing-guide/spectral-embedding): create lower-dimensional embeddings from graph structure.
-
-### Other APIs
-
-- [Dynamic Batching](/user-guide/api-guides/other-ap-is/dynamic-batching): collect many concurrent small ANN searches into larger GPU search batches.
-- [K-selection](/user-guide/api-guides/other-ap-is/k-selection): select the top `k` values or nearest candidates from larger result sets.
-- [Pairwise Distances](/user-guide/api-guides/other-ap-is/pairwise-distances): compute distances between vectors for analysis, validation, or algorithm building blocks.
-
-## Benchmarking Guide
-
-- [Methodologies](/user-guide/benchmarking-guide/methodologies): compare vector indexes fairly with quality buckets, Pareto curves, and consistent reporting.
-- [cuVS Bench Tool](/user-guide/benchmarking-guide/cu-vs-bench-tool): start with the cuVS Bench guide for reproducible benchmark workflows.
-- [cuVS Bench Installation](/user-guide/benchmarking-guide/cu-vs-bench-tool/installation): install cuVS Bench with packages or containers, or build it from source.
-- [cuVS Bench Usage](/user-guide/benchmarking-guide/cu-vs-bench-tool/usage): configure algorithms, run benchmarks, and read build and search results.
-- [cuVS Bench Datasets](/user-guide/benchmarking-guide/cu-vs-bench-tool/datasets): prepare datasets, ground truth, binary files, and dataset descriptors.
-- [cuVS Bench Backends](/user-guide/benchmarking-guide/cu-vs-bench-tool/backends): understand and extend backend integrations for benchmark execution.
-
-## Field Guide
-
-- [Field Guide](advanced_topics.md): find compatibility, runtime compilation, and specialized extension topics.
-- [Integration Patterns](/user-guide/field-guide/integration-patterns): compare direct, offloaded, and service-oriented ways to integrate cuVS into products.
-- [Compatibility](user_guide/abi_stability.md): understand cuVS release compatibility, ABI windows, and stable binary boundaries.
-- [JIT Compilation](jit_compilation.md): understand when cuVS triggers just-in-time compilation and how runtime caches behave.
-- [UDF Usage](udf_usage.md): supply custom CUDA distance metrics for IVF-flat search (C++ only, experimental).
-
-## References
-
-- [References](references.md): cite the research papers behind cuVS vector search, preprocessing, clustering, and GPU primitives.
+- [API Guide](/user-guide/api-guides): find task-focused NVIDIA cuVS API examples for clustering, vector indexing, preprocessing, common types, and supporting routines.
+- [Benchmarking Guide](/user-guide/benchmarking-guide): learn how to compare vector indexes fairly and use cuVS Bench for reproducible benchmark workflows.
+- [Field Guide](/user-guide/field-guide): find practical material for compatibility, integration patterns, runtime compilation, and extension points.
+- [References](references.md): cite the research papers behind NVIDIA cuVS vector search, preprocessing, clustering, and GPU primitives.
diff --git a/fern/scripts/generate_api_reference.py b/fern/scripts/generate_api_reference.py
index 500136e7cb..7541b11c8b 100755
--- a/fern/scripts/generate_api_reference.py
+++ b/fern/scripts/generate_api_reference.py
@@ -81,13 +81,43 @@
     "Utilities",
     "Other",
 ]
+API_REFERENCE_DIRS = [
+    "c_api",
+    "cpp_api",
+    "python_api",
+    "java_api",
+    "rust_api",
+    "go_api",
+]
 
 COMMENT_RE = re.compile(r"/\*\*.*?\*/|(?:///[^\n]*(?:\n|$))+", re.DOTALL)
 DOXYGEN_COMMAND_RE = re.compile(r"[@\\](\w+)\b")
 DOXYGEN_LIST_ITEM_RE = re.compile(r"^(?:-\s+|\d+\.\s+)")
+DESCRIPTION_BREAK_PREFIX_RE = re.compile(
+    r"^(?i:(?:possible values?|valid values?(?:\s+for\s+[^:]+)?|"
+    r"allowed values?|supported values?|options?|key fields|hint|note|nb|"
+    r"todo|warning|defaults?(?:\s+to)?)\b:?|use\s+`[^`]+`:?)"
+)
+DESCRIPTION_INLINE_BREAK_RE = re.compile(
+    r"\s+(?=(?i:(?:possible values?|valid values?(?:\s+for\s+[^:]+)?|"
+    r"allowed values?|supported values?)\b:?|"
+    r"(?:options?|key fields|hint|note|nb|todo|warning)\b:)|"
+    r"Defaults?\s+to\b|Use\s+`[^`]+`)"
+)
 DOXYGEN_FIELD_LIST_ITEM_RE = re.compile(
     r"^(?:-\s+)?`?(?P<name>[A-Za-z_]\w*)`?\s*:\s*(?P<description>.*)"
 )
+SQUASHED_MARKDOWN_LIST_PATTERNS = [
+    (
+        "inline markdown bullet list",
+        re.compile(r"(?<!<br />)\s+-\s+`[^`]+`:"),
+    ),
+    (
+        "inline markdown numbered list",
+        re.compile(r"(?:^|[\s:])1\.\s+[^|\n]+?\s+2\.\s+"),
+    ),
+]
+API_DECORATOR_LEAK_RE = re.compile(r"\bCUVS_EXPORT\b")
 PUBLIC_JAVA_TYPE_RE = re.compile(
     r"\bpublic\s+(?:abstract\s+|final\s+|sealed\s+|non-sealed\s+)?"
     r"(?P<kind>class|interface|enum|record)\s+(?P<name>[A-Za-z_]\w*)"
@@ -99,6 +129,7 @@
 CPP_COMPOUND_RE = re.compile(
     r"^\s*(?:typedef\s+)?(?:struct|class|enum(?:\s+class)?)\b"
 )
+API_DECORATOR_RE = re.compile(r"\bCUVS_EXPORT\b\s*")
 
 
 @dataclass
@@ -401,6 +432,7 @@ def main() -> int:
     generate_rust_api_pages()
     generate_go_api_pages()
     update_api_navigation()
+    validate_generated_api_markdown()
     return 0
 
 
@@ -2428,7 +2460,8 @@ def render_native_function(
     else:
         lines = [f"**Additional overload:** `{escape_code(entry.name)}`", ""]
     if entry.summary:
-        lines.extend([escape_text(entry.summary), ""])
+        lines.extend(render_doxygen_summary(entry.summary))
+        lines.append("")
     lines.extend([f"```{language}", signature, "```", ""])
 
     if entry.details:
@@ -2504,24 +2537,46 @@ def render_native_compound(
         f"### {heading_text(entry.name)}",
         "",
     ]
-    if entry.summary:
-        lines.extend([escape_text(entry.summary), ""])
 
     members: list[DoxygenEntry] = []
     values: list[dict[str, str]] = []
     field_descriptions: dict[str, str] = {}
+    summary = entry.summary
     details = entry.details
     if entry.kind == "enum":
         values = parse_enum_values(entry.signature)
-        field_descriptions, details = extract_field_descriptions(
+        summary_field_descriptions, summary_remaining = (
+            extract_field_descriptions(
+                summary.splitlines(), {value["name"] for value in values}
+            )
+        )
+        detail_field_descriptions, details = extract_field_descriptions(
             entry.details, {value["name"] for value in values}
         )
+        field_descriptions = {
+            **summary_field_descriptions,
+            **detail_field_descriptions,
+        }
+        summary = "\n".join(trim_blank_lines(summary_remaining)).strip()
     elif not is_class_signature(entry.signature):
         members = parse_struct_members(entry)
-        field_descriptions, details = extract_field_descriptions(
+        summary_field_descriptions, summary_remaining = (
+            extract_field_descriptions(
+                summary.splitlines(), {member.name for member in members}
+            )
+        )
+        detail_field_descriptions, details = extract_field_descriptions(
             entry.details, {member.name for member in members}
         )
+        field_descriptions = {
+            **summary_field_descriptions,
+            **detail_field_descriptions,
+        }
+        summary = "\n".join(trim_blank_lines(summary_remaining)).strip()
 
+    if summary:
+        lines.extend(render_doxygen_summary(summary))
+        lines.append("")
     if details:
         lines.extend(render_doxygen_details(details))
         lines.append("")
@@ -2593,7 +2648,8 @@ def render_native_member(entry: DoxygenEntry, language: str) -> list[str]:
         "",
     ]
     if entry.summary:
-        lines.extend([escape_text(entry.summary), ""])
+        lines.extend(render_doxygen_summary(entry.summary))
+        lines.append("")
     lines.extend(
         [f"```{language}", normalize_signature(entry.signature), "```", ""]
     )
@@ -3285,11 +3341,15 @@ def parse_doxygen_entry(
             continue
 
         if active_returns:
-            returns = append_sentence(returns, clean_doxygen_text(line_text))
+            returns = append_doxygen_line(
+                returns, clean_doxygen_text(line_text)
+            )
             continue
 
         if active_summary:
-            summary = append_sentence(summary, clean_doxygen_text(line_text))
+            summary = append_doxygen_line(
+                summary, clean_doxygen_text(line_text)
+            )
             continue
 
         details.append(clean_doxygen_text(raw_line.rstrip()))
@@ -3410,7 +3470,9 @@ def append_doxygen_line(existing: str, addition: str) -> str:
     if not existing:
         return addition
     lines = existing.splitlines()
-    if DOXYGEN_LIST_ITEM_RE.match(addition):
+    if DOXYGEN_LIST_ITEM_RE.match(addition) or is_description_break_line(
+        addition
+    ):
         lines.append(addition)
     else:
         lines[-1] = append_sentence(lines[-1], addition)
@@ -3423,6 +3485,18 @@ def append_doxygen_blank_line(existing: str) -> str:
     return f"{existing}\n"
 
 
+def is_description_break_line(line: str) -> bool:
+    return bool(DESCRIPTION_BREAK_PREFIX_RE.match(line.strip()))
+
+
+def split_description_breaks(line: str) -> list[str]:
+    return [
+        part.strip()
+        for part in DESCRIPTION_INLINE_BREAK_RE.split(line.strip())
+        if part.strip()
+    ]
+
+
 def parse_doxygen_kind(declaration: str) -> str:
     untemplated = strip_leading_cpp_templates(declaration)
     if re.search(r"^\s*(?:typedef\s+)?(?:struct|class)\b", untemplated) and (
@@ -3492,7 +3566,7 @@ def parse_member_name(declaration: str) -> str:
 def parse_struct_name(declaration: str) -> str | None:
     declaration = strip_leading_cpp_templates(declaration)
     match = re.search(
-        r"^\s*(?:typedef\s+)?(?:struct|class)\s+([A-Za-z_]\w*)",
+        r"^\s*(?:typedef\s+)?(?:struct|class)\s+(?:CUVS_EXPORT\s+)?([A-Za-z_]\w*)",
         declaration,
     )
     return match.group(1) if match else None
@@ -3565,7 +3639,7 @@ def infer_cpp_context(prefix: str) -> tuple[str, list[str], str | None]:
     depth = 0
     token_re = re.compile(
         r"\bnamespace\s+([A-Za-z_][\w:]*)(?:\s*=\s*[^;{}]+)?\s*{"
-        r"|\b(class|struct)\s+([A-Za-z_]\w*)[^;{}]*{"
+        r"|\b(class|struct)\s+(?:CUVS_EXPORT\s+)?([A-Za-z_]\w*)[^;{}]*{"
         r"|\b(public|private|protected)\s*:"
         r"|[{}]"
     )
@@ -3629,6 +3703,7 @@ def normalize_entry_signature(declaration: str, kind: str) -> str:
 
 
 def normalize_signature(declaration: str) -> str:
+    declaration = API_DECORATOR_RE.sub("", declaration)
     declaration = re.sub(r"\n\s+", "\n", declaration.strip())
     return "\n".join(
         line.rstrip() for line in declaration.splitlines()
@@ -3972,7 +4047,9 @@ def extract_field_descriptions(
                     break
                 description.append(next_stripped)
                 idx += 1
-            descriptions[name] = " ".join(part for part in description if part)
+            descriptions[name] = "\n".join(
+                part for part in description if part
+            )
             continue
         remaining.append(line)
         idx += 1
@@ -4320,7 +4397,10 @@ def render_table_description(value: str) -> str:
     rendered: list[str] = []
     for paragraph in paragraphs:
         normalized = normalize_description_lines(paragraph)
-        if any(DOXYGEN_LIST_ITEM_RE.match(line) for line in normalized):
+        if any(
+            DOXYGEN_LIST_ITEM_RE.match(line) or is_description_break_line(line)
+            for line in normalized
+        ):
             rendered.append(
                 "<br />".join(escape_text(line) for line in normalized)
             )
@@ -4333,25 +4413,41 @@ def normalize_description_lines(raw_lines: list[str]) -> list[str]:
     lines: list[str] = []
     paragraph: list[str] = []
     in_list = False
+    in_semantic_break = False
+
+    def flush_paragraph() -> None:
+        nonlocal paragraph
+        if paragraph:
+            lines.append(" ".join(paragraph))
+            paragraph = []
 
     for raw_line in raw_lines:
-        line = raw_line.strip()
-        if DOXYGEN_LIST_ITEM_RE.match(line):
-            if paragraph:
-                lines.append(" ".join(paragraph))
-                paragraph = []
-            lines.append(line)
-            in_list = True
-            continue
+        for line in split_description_breaks(raw_line):
+            if DOXYGEN_LIST_ITEM_RE.match(line):
+                flush_paragraph()
+                lines.append(line)
+                in_list = True
+                in_semantic_break = False
+                continue
 
-        if in_list and lines:
-            lines[-1] = append_sentence(lines[-1], line)
-            continue
+            if is_description_break_line(line):
+                flush_paragraph()
+                lines.append(line)
+                in_list = False
+                in_semantic_break = True
+                continue
+
+            if in_list and lines:
+                lines[-1] = append_sentence(lines[-1], line)
+                continue
+
+            if in_semantic_break and lines:
+                lines[-1] = append_sentence(lines[-1], line)
+                continue
 
-        paragraph.append(line)
+            paragraph.append(line)
 
-    if paragraph:
-        lines.append(" ".join(paragraph))
+    flush_paragraph()
     return lines
 
 
@@ -4398,6 +4494,13 @@ def flush_paragraph() -> None:
     return trim_blank_lines(lines)
 
 
+def render_doxygen_summary(summary: str) -> list[str]:
+    raw_lines = summary.splitlines()
+    if len(raw_lines) > 1:
+        return render_doxygen_details(raw_lines)
+    return [escape_text(summary)]
+
+
 def render_doc_lines(raw_lines: list[str]) -> list[str]:
     lines = []
     in_code = False
@@ -4574,9 +4677,11 @@ def parse_javadoc(raw: str) -> JavaDoc:
             active_kind = ""
             continue
         if active is not None and active_kind in {"param", "throws"}:
-            active.description = append_sentence(active.description, stripped)
+            active.description = append_doxygen_line(
+                active.description, stripped
+            )
         elif active_kind == "return":
-            doc.returns = append_sentence(doc.returns, stripped)
+            doc.returns = append_doxygen_line(doc.returns, stripped)
         else:
             summary_lines.append(stripped)
     doc.summary = "\n".join(trim_blank_lines(summary_lines)).strip()
@@ -4607,7 +4712,7 @@ def render_javadoc(doc: JavaDoc) -> list[str]:
         )
         for param in doc.params:
             lines.append(
-                f"| `{escape_code(param.name)}` | {escape_text(param.description)} |"
+                f"| `{escape_code(param.name)}` | {render_table_description(param.description)} |"
             )
         lines.append("")
     if doc.returns:
@@ -4618,7 +4723,7 @@ def render_javadoc(doc: JavaDoc) -> list[str]:
         )
         for param in doc.throws:
             lines.append(
-                f"| `{escape_code(param.name)}` | {escape_text(param.description)} |"
+                f"| `{escape_code(param.name)}` | {render_table_description(param.description)} |"
             )
         lines.append("")
     return trim_blank_lines(lines)
@@ -5709,8 +5814,11 @@ def render_param_table(
         row.append(render_type_reference(param.get("type", ""), symbol_links))
         description = param.get("description", "")
         if param.get("default"):
+            default_text = f"Default: `{param['default']}`."
             description = (
-                f"{description} Default: `{param['default']}`.".strip()
+                f"{description}\n{default_text}"
+                if description
+                else default_text
             )
         row.append(render_table_description(description))
         lines.append("| " + " | ".join(row) + " |")
@@ -5873,5 +5981,66 @@ def write_page(path: Path, lines: list[str]) -> None:
     print(f"Wrote {path.relative_to(REPO_DIR)}")
 
 
+def validate_generated_api_markdown() -> None:
+    failures: list[str] = []
+    for api_dir in API_REFERENCE_DIRS:
+        page_dir = FERN_PAGES / api_dir
+        if not page_dir.exists():
+            continue
+        for path in sorted(page_dir.glob("*.md")):
+            failures.extend(find_squashed_markdown_lists(path))
+            failures.extend(find_api_decorator_leaks(path))
+
+    if failures:
+        examples = "\n".join(failures[:50])
+        suffix = ""
+        if len(failures) > 50:
+            suffix = f"\n... and {len(failures) - 50} more"
+        raise RuntimeError(
+            "Generated API docs contain list markers flattened into prose. "
+            "Preserve the source Doxygen line breaks or update the parser.\n"
+            f"{examples}{suffix}"
+        )
+
+
+def find_squashed_markdown_lists(path: Path) -> list[str]:
+    failures: list[str] = []
+    in_code_block = False
+    for line_no, line in enumerate(
+        path.read_text(encoding="utf-8").splitlines(), 1
+    ):
+        if line.lstrip().startswith("```"):
+            in_code_block = not in_code_block
+            continue
+        if in_code_block:
+            continue
+
+        for label, pattern in SQUASHED_MARKDOWN_LIST_PATTERNS:
+            if pattern.search(line):
+                preview = line.strip()
+                if len(preview) > 220:
+                    preview = f"{preview[:217]}..."
+                failures.append(
+                    f"{path.relative_to(REPO_DIR)}:{line_no}: {label}: {preview}"
+                )
+    return failures
+
+
+def find_api_decorator_leaks(path: Path) -> list[str]:
+    failures: list[str] = []
+    for line_no, line in enumerate(
+        path.read_text(encoding="utf-8").splitlines(), 1
+    ):
+        if not API_DECORATOR_LEAK_RE.search(line):
+            continue
+        preview = line.strip()
+        if len(preview) > 220:
+            preview = f"{preview[:217]}..."
+        failures.append(
+            f"{path.relative_to(REPO_DIR)}:{line_no}: API decorator leaked into docs: {preview}"
+        )
+    return failures
+
+
 if __name__ == "__main__":
     raise SystemExit(main())

From 55a043809bd947d278d4c1c20e18b61962738f24 Mon Sep 17 00:00:00 2001
From: "Corey J. Nolet" <cjnolet@gmail.com>
Date: Thu, 4 Jun 2026 14:43:03 -0400
Subject: [PATCH 06/10] Set NVIDIA global Fern theme

---
 fern/docs.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fern/docs.yml b/fern/docs.yml
index adc7034efd..5e6eab3d42 100644
--- a/fern/docs.yml
+++ b/fern/docs.yml
@@ -1,5 +1,6 @@
 # yaml-language-server: $schema=https://schema.buildwithfern.dev/docs-yml.json
 
+global-theme: nvidia
 title: "cuVS"
 instances:
   - url: "nvidia-cuvs.docs.buildwithfern.com/cuvs"

From fc579bcf6c4a46032bcb8274a628954d0877ae63 Mon Sep 17 00:00:00 2001
From: "Corey J. Nolet" <cjnolet@gmail.com>
Date: Tue, 9 Jun 2026 16:35:28 -0400
Subject: [PATCH 07/10] Updating README to point to new docs

---
 README.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index 1443ed90bc..b4025bc835 100755
--- a/README.md
+++ b/README.md
@@ -12,11 +12,11 @@
 
 ## Useful Resources
 
-- [Documentation](https://docs.rapids.ai/api/cuvs/nightly/): Library documentation.
-- [Build and Install Guide](https://docs.rapids.ai/api/cuvs/nightly/build): Instructions for installing and building cuVS.
-- [Getting Started Guide](https://docs.rapids.ai/api/cuvs/nightly/getting_started): Guide to getting started with cuVS.
+- [Documentation](https://docs.nvidia.com/cuvs): Library documentation.
+- [Build and Install Guide](https://docs.nvidia.com/cuvs/installation): Instructions for installing and building cuVS.
+- [Getting Started Guide](https://docs.nvidia.com/cuvs/getting-started): Guide to getting started with cuVS.
 - [Code Examples](https://github.com/rapidsai/cuvs/tree/HEAD/examples): Self-contained Code Examples.
-- [API Reference Documentation](https://docs.rapids.ai/api/cuvs/nightly/api_docs): API Documentation.
+- [API Reference Documentation](https://docs.nvidia.com/cuvs/api_reference): API Documentation.
 - [RAPIDS Community](https://rapids.ai/community.html): Get help, contribute, and collaborate.
 - [GitHub repository](https://github.com/rapidsai/cuvs): Download the cuVS source code.
 - [Issue tracker](https://github.com/rapidsai/cuvs/issues): Report issues or request features.

From 8dc29fc106eea28c6e1414363df09e083bfef5f8 Mon Sep 17 00:00:00 2001
From: "Corey J. Nolet" <cjnolet@gmail.com>
Date: Tue, 9 Jun 2026 16:44:57 -0400
Subject: [PATCH 08/10] Add API Reference landing page

---
 fern/docs.yml               |  1 +
 fern/pages/api_reference.md | 18 ++++++++++++++++++
 2 files changed, 19 insertions(+)
 create mode 100644 fern/pages/api_reference.md

diff --git a/fern/docs.yml b/fern/docs.yml
index 5e6eab3d42..bceb3113c0 100644
--- a/fern/docs.yml
+++ b/fern/docs.yml
@@ -259,6 +259,7 @@ navigation:
       - page: "Contributing"
         path: "./pages/contributing.md"
   - section: "API Reference"
+    path: "./pages/api_reference.md"
     contents:
       - section: "C API Documentation"
         path: "./pages/c_api/index.md"
diff --git a/fern/pages/api_reference.md b/fern/pages/api_reference.md
new file mode 100644
index 0000000000..f52a20d2b0
--- /dev/null
+++ b/fern/pages/api_reference.md
@@ -0,0 +1,18 @@
+---
+slug: api-reference
+---
+
+# API Reference
+
+Use these generated references to inspect NVIDIA cuVS APIs by language. These pages are generated from the public source code and are best for checking signatures, parameters, return types, types, and language-specific API surfaces.
+
+For task-focused examples and usage guidance, see the [API Guide](/user-guide/api-guides).
+
+## Language References
+
+- [C API Documentation](/api-reference/c-api-documentation): inspect the ABI-stable C API layer used by NVIDIA cuVS language bindings and downstream integrations.
+- [C++ API Documentation](/api-reference/cpp-api-documentation): inspect the core NVIDIA cuVS C++ APIs, including common RAFT types used in public headers.
+- [Python API Documentation](/api-reference/python-api-documentation): inspect Python modules, classes, functions, and parameters.
+- [Java API Documentation](/api-reference/java-api-documentation): inspect Java classes, resources, matrices, indexes, and query APIs.
+- [Rust API Documentation](/api-reference/rust-api-documentation): inspect Rust modules, structs, and wrappers around NVIDIA cuVS APIs.
+- [Go API Documentation](/api-reference/go-api-documentation): inspect Go packages for supported NVIDIA cuVS APIs.

From 24bc4263c3735d57bc98f84e9b475e1d7d32ef7b Mon Sep 17 00:00:00 2001
From: "Corey J. Nolet" <cjnolet@gmail.com>
Date: Thu, 25 Jun 2026 11:47:57 -0400
Subject: [PATCH 09/10] Update cuVS repository URLs

---
 CHANGELOG.md                                  | 1830 ++++++++---------
 Dockerfile                                    |    2 +-
 README.md                                     |   12 +-
 ci/build_java.sh                              |    2 +-
 ci/check_c_abi/pyproject.toml                 |    2 +-
 ci/test_java.sh                               |    2 +-
 cpp/bench/ann/src/diskann/diskann_wrapper.h   |    2 +-
 cpp/include/cuvs/cluster/kmeans.hpp           |    2 +-
 cpp/include/cuvs/cluster/spectral.hpp         |    2 +-
 cpp/include/cuvs/neighbors/common.hpp         |    2 +-
 cpp/include/cuvs/neighbors/hnsw.hpp           |    2 +-
 .../all_neighbors/all_neighbors_merge.cuh     |    2 +-
 .../neighbors/detail/cagra/cagra_build.cuh    |    2 +-
 cpp/src/neighbors/detail/nn_descent.cuh       |    4 +-
 .../neighbors/detail/vamana/vamana_build.cuh  |    2 +-
 cpp/src/neighbors/detail/vpq_dataset.cuh      |    2 +-
 cpp/src/neighbors/mg/snmg.cuh                 |    2 +-
 .../ann_cagra/bug_issue_93_reproducer.cu      |    2 +-
 .../ann_cagra/bug_iterative_cagra_build.cu    |    2 +-
 cpp/tests/neighbors/ann_ivf_flat.cuh          |    2 +-
 cpp/tests/neighbors/ann_ivf_pq.cuh            |    2 +-
 examples/go/README.md                         |    6 +-
 examples/go/go.mod                            |    2 +-
 examples/go/go.sum                            |    4 +-
 examples/go/main.go                           |    4 +-
 fern/docs.yml                                 |    2 +-
 fern/pages/contributing.md                    |    8 +-
 fern/pages/cpp_api/cpp-api-cluster-kmeans.md  |    2 +-
 .../pages/cpp_api/cpp-api-neighbors-common.md |    2 +-
 fern/pages/cpp_guidelines.md                  |    2 +-
 fern/pages/cuvs_bench/datasets.md             |    2 +-
 fern/pages/index.md                           |    8 +-
 fern/pages/installation/go.md                 |    2 +-
 fern/pages/integrations.md                    |    2 +-
 fern/pages/neighbors/bruteforce.md            |    4 +-
 fern/pages/neighbors/cagra.md                 |    8 +-
 fern/pages/neighbors/ivfflat.md               |    4 +-
 fern/pages/neighbors/ivfpq.md                 |    4 +-
 fern/pages/other/memory_management.md         |    4 +-
 fern/pages/other/multidimensional_arrays.md   |   10 +-
 fern/pages/other/resources.md                 |    6 +-
 go/brute_force/brute_force.go                 |    2 +-
 go/brute_force/brute_force_test.go            |    2 +-
 go/cagra/cagra.go                             |    2 +-
 go/cagra/cagra_test.go                        |    2 +-
 go/cagra/extend_params.go                     |    2 +-
 go/cagra/index_params.go                      |    2 +-
 go/cagra/search_params.go                     |    2 +-
 go/distance_test.go                           |    2 +-
 go/dlpack_test.go                             |    2 +-
 go/go.mod                                     |    2 +-
 go/ivf_flat/index_params.go                   |    2 +-
 go/ivf_flat/ivf_flat.go                       |    2 +-
 go/ivf_flat/ivf_flat_test.go                  |    2 +-
 go/ivf_flat/search_params.go                  |    2 +-
 go/ivf_pq/index_params.go                     |    2 +-
 go/ivf_pq/ivf_pq.go                           |    2 +-
 go/ivf_pq/ivf_pq_test.go                      |    2 +-
 go/ivf_pq/search_params.go                    |    2 +-
 go/memory_resource_test.go                    |    2 +-
 java/cuvs-java/pom.xml                        |    4 +-
 .../com/nvidia/cuvs/CagraRandomizedIT.java    |    2 +-
 .../VectorSearch_QuestionRetrieval.ipynb      |    2 +-
 python/cuvs/cuvs/tests/test_doctests.py       |    2 +-
 python/cuvs/cuvs/tests/test_vamana.py         |    2 +-
 python/cuvs/pyproject.toml                    |    2 +-
 python/cuvs_bench/cuvs_bench/plot/__main__.py |    2 +-
 python/cuvs_bench/pyproject.toml              |    2 +-
 python/libcuvs/pyproject.toml                 |    2 +-
 rust/Cargo.toml                               |    4 +-
 70 files changed, 1015 insertions(+), 1015 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 300cffa0a1..efee6cb9b0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,1107 +1,1107 @@
 # cuvs 26.06.00 (3 Jun 2026)
 
 ### 🚨 Breaking Changes
-* Default to static linking of libcudart by @bdice in https://github.com/rapidsai/cuvs/pull/1627
-* Remove JIT+LTO fragment database by @KyleFromNVIDIA in https://github.com/rapidsai/cuvs/pull/1927
-* Use static cudart by @KyleFromNVIDIA in https://github.com/rapidsai/cuvs/pull/1931
-* Always build with JIT+LTO by @KyleFromNVIDIA in https://github.com/rapidsai/cuvs/pull/1923
-* Migrate RMM usage to CCCL MR design by @bdice in https://github.com/rapidsai/cuvs/pull/1990
-* Exposition of KMeans param object for PQ in C++ by @lowener in https://github.com/rapidsai/cuvs/pull/2005
-* [Cleanup] Combine Batched and Regular KMeans Impl by @tarang-jain in https://github.com/rapidsai/cuvs/pull/2015
-* Preserve input memory location for NN Descent by @jinsolp in https://github.com/rapidsai/cuvs/pull/1928
+* Default to static linking of libcudart by @bdice in https://github.com/nvidia/cuvs/pull/1627
+* Remove JIT+LTO fragment database by @KyleFromNVIDIA in https://github.com/nvidia/cuvs/pull/1927
+* Use static cudart by @KyleFromNVIDIA in https://github.com/nvidia/cuvs/pull/1931
+* Always build with JIT+LTO by @KyleFromNVIDIA in https://github.com/nvidia/cuvs/pull/1923
+* Migrate RMM usage to CCCL MR design by @bdice in https://github.com/nvidia/cuvs/pull/1990
+* Exposition of KMeans param object for PQ in C++ by @lowener in https://github.com/nvidia/cuvs/pull/2005
+* [Cleanup] Combine Batched and Regular KMeans Impl by @tarang-jain in https://github.com/nvidia/cuvs/pull/2015
+* Preserve input memory location for NN Descent by @jinsolp in https://github.com/nvidia/cuvs/pull/1928
 ### 🐛 Bug Fixes
-* Fix CCCL compilation error by @viclafargue in https://github.com/rapidsai/cuvs/pull/1963
-* Forward-merge release/26.04 into main by @KyleFromNVIDIA in https://github.com/rapidsai/cuvs/pull/1971
-* Forward-merge release/26.04 into main by @KyleFromNVIDIA in https://github.com/rapidsai/cuvs/pull/1980
-* Remove dangling pointers in JIT Fragments by @divyegala in https://github.com/rapidsai/cuvs/pull/1988
-* Add `head_rev` to cuvs recipe by @KyleFromNVIDIA in https://github.com/rapidsai/cuvs/pull/1993
-* Fix potential OOB access in CAGRA search when graph size < dataset size by @irina-resh-nvda in https://github.com/rapidsai/cuvs/pull/1780
-* Fix MG kmeans intertia_check n_iters by @aamijar in https://github.com/rapidsai/cuvs/pull/2020
-* Fix cuvs_bench pytest pareto assert by @aamijar in https://github.com/rapidsai/cuvs/pull/2027
-* Fix nightly build matrix by @KyleFromNVIDIA in https://github.com/rapidsai/cuvs/pull/2054
-* Fix vulnerable index deserialization by @lowener in https://github.com/rapidsai/cuvs/pull/2068
-* Fix symbol export kmeans by @aamijar in https://github.com/rapidsai/cuvs/pull/2070
-* Fix argmin/argmax based on the distance type by @achirkin in https://github.com/rapidsai/cuvs/pull/2016
-* Remove unneeded request for CUDA device link phase by @robertmaynard in https://github.com/rapidsai/cuvs/pull/2077
-* Update Faiss and DiskANN Patch to Use C++20 by @tarang-jain in https://github.com/rapidsai/cuvs/pull/1796
-* Fix brute force Rust index dataset lifetime by @yan-zaretskiy in https://github.com/rapidsai/cuvs/pull/2083
-* Fix segfault cuvs bench by @aamijar in https://github.com/rapidsai/cuvs/pull/2088
-* Fix cagra::optimize modifying the state of raft::resources by @achirkin in https://github.com/rapidsai/cuvs/pull/2103
-* Add direct target dependency when embedding fatbins by @KyleFromNVIDIA in https://github.com/rapidsai/cuvs/pull/2106
-* Fix check for PQ vectorized load by @lowener in https://github.com/rapidsai/cuvs/pull/2107
-* Fix workspace usage by @mfoerste4 in https://github.com/rapidsai/cuvs/pull/2135
-* Add missing visibility controls in IVF SQ by @divyegala in https://github.com/rapidsai/cuvs/pull/2141
+* Fix CCCL compilation error by @viclafargue in https://github.com/nvidia/cuvs/pull/1963
+* Forward-merge release/26.04 into main by @KyleFromNVIDIA in https://github.com/nvidia/cuvs/pull/1971
+* Forward-merge release/26.04 into main by @KyleFromNVIDIA in https://github.com/nvidia/cuvs/pull/1980
+* Remove dangling pointers in JIT Fragments by @divyegala in https://github.com/nvidia/cuvs/pull/1988
+* Add `head_rev` to cuvs recipe by @KyleFromNVIDIA in https://github.com/nvidia/cuvs/pull/1993
+* Fix potential OOB access in CAGRA search when graph size < dataset size by @irina-resh-nvda in https://github.com/nvidia/cuvs/pull/1780
+* Fix MG kmeans intertia_check n_iters by @aamijar in https://github.com/nvidia/cuvs/pull/2020
+* Fix cuvs_bench pytest pareto assert by @aamijar in https://github.com/nvidia/cuvs/pull/2027
+* Fix nightly build matrix by @KyleFromNVIDIA in https://github.com/nvidia/cuvs/pull/2054
+* Fix vulnerable index deserialization by @lowener in https://github.com/nvidia/cuvs/pull/2068
+* Fix symbol export kmeans by @aamijar in https://github.com/nvidia/cuvs/pull/2070
+* Fix argmin/argmax based on the distance type by @achirkin in https://github.com/nvidia/cuvs/pull/2016
+* Remove unneeded request for CUDA device link phase by @robertmaynard in https://github.com/nvidia/cuvs/pull/2077
+* Update Faiss and DiskANN Patch to Use C++20 by @tarang-jain in https://github.com/nvidia/cuvs/pull/1796
+* Fix brute force Rust index dataset lifetime by @yan-zaretskiy in https://github.com/nvidia/cuvs/pull/2083
+* Fix segfault cuvs bench by @aamijar in https://github.com/nvidia/cuvs/pull/2088
+* Fix cagra::optimize modifying the state of raft::resources by @achirkin in https://github.com/nvidia/cuvs/pull/2103
+* Add direct target dependency when embedding fatbins by @KyleFromNVIDIA in https://github.com/nvidia/cuvs/pull/2106
+* Fix check for PQ vectorized load by @lowener in https://github.com/nvidia/cuvs/pull/2107
+* Fix workspace usage by @mfoerste4 in https://github.com/nvidia/cuvs/pull/2135
+* Add missing visibility controls in IVF SQ by @divyegala in https://github.com/nvidia/cuvs/pull/2141
 ### 📖 Documentation
-* Elaborate on fragment architecture in JIT+LTO documentation by @KyleFromNVIDIA in https://github.com/rapidsai/cuvs/pull/1991
-* Add Cluster and Distance sections to C documentation by @lowener in https://github.com/rapidsai/cuvs/pull/1955
-* Adding CAGRA merge to the documentation by @viclafargue in https://github.com/rapidsai/cuvs/pull/1942
-* [Doc Update] CAGRA Memory Footprint by @singhmanas1 in https://github.com/rapidsai/cuvs/pull/1300
-* Align docs with pluggable benchmark API by @jnke2016 in https://github.com/rapidsai/cuvs/pull/1891
-* Add docs for cagra mem usage with NN Descent build algo by @jinsolp in https://github.com/rapidsai/cuvs/pull/2000
-* Fix minor typos in ``cuvs-bench`` source build docs by @jrbourbeau in https://github.com/rapidsai/cuvs/pull/2006
-* Fix `cuvs-bench` docker images in docs by @jrbourbeau in https://github.com/rapidsai/cuvs/pull/2003
-* Update JIT+LTO guide to reflect new automatic embedding system by @KyleFromNVIDIA in https://github.com/rapidsai/cuvs/pull/2045
-* [Docs] Convert Sphinx docs to Fern by @cjnolet in https://github.com/rapidsai/cuvs/pull/2067
-* Add UDF Usage and Developer docs by @divyegala in https://github.com/rapidsai/cuvs/pull/2030
-* [DOC] Adding API guides for core cuVS types by @cjnolet in https://github.com/rapidsai/cuvs/pull/2117
+* Elaborate on fragment architecture in JIT+LTO documentation by @KyleFromNVIDIA in https://github.com/nvidia/cuvs/pull/1991
+* Add Cluster and Distance sections to C documentation by @lowener in https://github.com/nvidia/cuvs/pull/1955
+* Adding CAGRA merge to the documentation by @viclafargue in https://github.com/nvidia/cuvs/pull/1942
+* [Doc Update] CAGRA Memory Footprint by @singhmanas1 in https://github.com/nvidia/cuvs/pull/1300
+* Align docs with pluggable benchmark API by @jnke2016 in https://github.com/nvidia/cuvs/pull/1891
+* Add docs for cagra mem usage with NN Descent build algo by @jinsolp in https://github.com/nvidia/cuvs/pull/2000
+* Fix minor typos in ``cuvs-bench`` source build docs by @jrbourbeau in https://github.com/nvidia/cuvs/pull/2006
+* Fix `cuvs-bench` docker images in docs by @jrbourbeau in https://github.com/nvidia/cuvs/pull/2003
+* Update JIT+LTO guide to reflect new automatic embedding system by @KyleFromNVIDIA in https://github.com/nvidia/cuvs/pull/2045
+* [Docs] Convert Sphinx docs to Fern by @cjnolet in https://github.com/nvidia/cuvs/pull/2067
+* Add UDF Usage and Developer docs by @divyegala in https://github.com/nvidia/cuvs/pull/2030
+* [DOC] Adding API guides for core cuVS types by @cjnolet in https://github.com/nvidia/cuvs/pull/2117
 ### 🚀 New Features
-* [REVIEW] Add L1 support to NN-Descent by @yan-zaretskiy in https://github.com/rapidsai/cuvs/pull/1898
-* PCA C and Python API by @aamijar in https://github.com/rapidsai/cuvs/pull/1987
-* Introduce UDF Architecture by @divyegala in https://github.com/rapidsai/cuvs/pull/1804
-* JIT LTO Cagra Search by @divyegala in https://github.com/rapidsai/cuvs/pull/1807
-* Expose supported brute force metrics in `all_neighbors` by @jinsolp in https://github.com/rapidsai/cuvs/pull/1827
-* [REVIEW] Generalize and improve cagra::optimize by @mfoerste4 in https://github.com/rapidsai/cuvs/pull/1830
-* IVF-SQ C++ API by @viclafargue in https://github.com/rapidsai/cuvs/pull/1865
+* [REVIEW] Add L1 support to NN-Descent by @yan-zaretskiy in https://github.com/nvidia/cuvs/pull/1898
+* PCA C and Python API by @aamijar in https://github.com/nvidia/cuvs/pull/1987
+* Introduce UDF Architecture by @divyegala in https://github.com/nvidia/cuvs/pull/1804
+* JIT LTO Cagra Search by @divyegala in https://github.com/nvidia/cuvs/pull/1807
+* Expose supported brute force metrics in `all_neighbors` by @jinsolp in https://github.com/nvidia/cuvs/pull/1827
+* [REVIEW] Generalize and improve cagra::optimize by @mfoerste4 in https://github.com/nvidia/cuvs/pull/1830
+* IVF-SQ C++ API by @viclafargue in https://github.com/nvidia/cuvs/pull/1865
 ### 🛠️ Improvements
-* Use PQ API in CAGRA-Q + SCANN by @lowener in https://github.com/rapidsai/cuvs/pull/1746
-* Speed up recall calculation in cuVS Bench for large top-K by @jamxia155 in https://github.com/rapidsai/cuvs/pull/1816
-* Update codespell Version in pre-commit-config by @tarang-jain in https://github.com/rapidsai/cuvs/pull/1920
-* Forward-merge release/26.04 into main by @gforsyth in https://github.com/rapidsai/cuvs/pull/1936
-* Refactor `StaticFatbinFragmentEntry` to use tags by @KyleFromNVIDIA in https://github.com/rapidsai/cuvs/pull/1970
-* Replace cudaMemcpy2DAsync Calls with raft::copy_matrix by @tarang-jain in https://github.com/rapidsai/cuvs/pull/1976
-* update pip devcontainers' base image tags by @trxcllnt in https://github.com/rapidsai/cuvs/pull/1985
-* Refactor instantiation matrices to generate at build time by @KyleFromNVIDIA in https://github.com/rapidsai/cuvs/pull/1984
-* Add option to enable "sve" optimization level on armv9 by @LizYou in https://github.com/rapidsai/cuvs/pull/1121
-* Improve cuvs-bench doc and add executable dir option by @tfeher in https://github.com/rapidsai/cuvs/pull/681
-* Enforce type safety in JIT+LTO launcher by @KyleFromNVIDIA in https://github.com/rapidsai/cuvs/pull/1997
-* Add KDE kernel by @Intron7 in https://github.com/rapidsai/cuvs/pull/1915
-* Coderabbit integration by @benfred in https://github.com/rapidsai/cuvs/pull/1908
-* Refactor fatbin registration to use common input file by @KyleFromNVIDIA in https://github.com/rapidsai/cuvs/pull/2008
-* Update to clang 20.1.8 by @bdice in https://github.com/rapidsai/cuvs/pull/2009
-* JIT+LTO IVF-PQ compute similarity by @KyleFromNVIDIA in https://github.com/rapidsai/cuvs/pull/1957
-* Refactor JIT+LTO kernels by @KyleFromNVIDIA in https://github.com/rapidsai/cuvs/pull/2021
-* feat(rust): add serialize/deserialize support for CAGRA index by @zbennett10 in https://github.com/rapidsai/cuvs/pull/1840
-* Use new compute-matrix workflow by @KyleFromNVIDIA in https://github.com/rapidsai/cuvs/pull/2034
-* Reuse minClusterAndDistance Helper for Balanced KMeans by @tarang-jain in https://github.com/rapidsai/cuvs/pull/2001
-* feat(rust): add search_with_filter to CAGRA Index by @jamie8johnson in https://github.com/rapidsai/cuvs/pull/2019
-* [REVIEW] Drop extra copy in `get_last_error_text` by @jakirkham in https://github.com/rapidsai/cuvs/pull/2044
-* FIX: disable warpspeed scan by @mfoerste4 in https://github.com/rapidsai/cuvs/pull/2062
-* Use `token.rapids.nvidia.com` when issuing S3 bucket creds in devcontainers by @trxcllnt in https://github.com/rapidsai/cuvs/pull/2047
-* Remove `NO_CUDART_DEP` property by @KyleFromNVIDIA in https://github.com/rapidsai/cuvs/pull/2065
-* Switch the remaining C++17 components to C++20 by @achirkin in https://github.com/rapidsai/cuvs/pull/2063
-* fix(ci): resolve all zizmor findings and add zizmor pre-commit checks by @gforsyth in https://github.com/rapidsai/cuvs/pull/2053
-* fix(ci): declare explicit secrets in `publish-rust.yaml` by @gforsyth in https://github.com/rapidsai/cuvs/pull/2069
-* [REVIEW] Rewrite cuvs-sys build to discover pre-installed cuVS via cmake-package by @yan-zaretskiy in https://github.com/rapidsai/cuvs/pull/2022
-* Fix symbol export by @vyasr in https://github.com/rapidsai/cuvs/pull/2052
+* Use PQ API in CAGRA-Q + SCANN by @lowener in https://github.com/nvidia/cuvs/pull/1746
+* Speed up recall calculation in cuVS Bench for large top-K by @jamxia155 in https://github.com/nvidia/cuvs/pull/1816
+* Update codespell Version in pre-commit-config by @tarang-jain in https://github.com/nvidia/cuvs/pull/1920
+* Forward-merge release/26.04 into main by @gforsyth in https://github.com/nvidia/cuvs/pull/1936
+* Refactor `StaticFatbinFragmentEntry` to use tags by @KyleFromNVIDIA in https://github.com/nvidia/cuvs/pull/1970
+* Replace cudaMemcpy2DAsync Calls with raft::copy_matrix by @tarang-jain in https://github.com/nvidia/cuvs/pull/1976
+* update pip devcontainers' base image tags by @trxcllnt in https://github.com/nvidia/cuvs/pull/1985
+* Refactor instantiation matrices to generate at build time by @KyleFromNVIDIA in https://github.com/nvidia/cuvs/pull/1984
+* Add option to enable "sve" optimization level on armv9 by @LizYou in https://github.com/nvidia/cuvs/pull/1121
+* Improve cuvs-bench doc and add executable dir option by @tfeher in https://github.com/nvidia/cuvs/pull/681
+* Enforce type safety in JIT+LTO launcher by @KyleFromNVIDIA in https://github.com/nvidia/cuvs/pull/1997
+* Add KDE kernel by @Intron7 in https://github.com/nvidia/cuvs/pull/1915
+* Coderabbit integration by @benfred in https://github.com/nvidia/cuvs/pull/1908
+* Refactor fatbin registration to use common input file by @KyleFromNVIDIA in https://github.com/nvidia/cuvs/pull/2008
+* Update to clang 20.1.8 by @bdice in https://github.com/nvidia/cuvs/pull/2009
+* JIT+LTO IVF-PQ compute similarity by @KyleFromNVIDIA in https://github.com/nvidia/cuvs/pull/1957
+* Refactor JIT+LTO kernels by @KyleFromNVIDIA in https://github.com/nvidia/cuvs/pull/2021
+* feat(rust): add serialize/deserialize support for CAGRA index by @zbennett10 in https://github.com/nvidia/cuvs/pull/1840
+* Use new compute-matrix workflow by @KyleFromNVIDIA in https://github.com/nvidia/cuvs/pull/2034
+* Reuse minClusterAndDistance Helper for Balanced KMeans by @tarang-jain in https://github.com/nvidia/cuvs/pull/2001
+* feat(rust): add search_with_filter to CAGRA Index by @jamie8johnson in https://github.com/nvidia/cuvs/pull/2019
+* [REVIEW] Drop extra copy in `get_last_error_text` by @jakirkham in https://github.com/nvidia/cuvs/pull/2044
+* FIX: disable warpspeed scan by @mfoerste4 in https://github.com/nvidia/cuvs/pull/2062
+* Use `token.rapids.nvidia.com` when issuing S3 bucket creds in devcontainers by @trxcllnt in https://github.com/nvidia/cuvs/pull/2047
+* Remove `NO_CUDART_DEP` property by @KyleFromNVIDIA in https://github.com/nvidia/cuvs/pull/2065
+* Switch the remaining C++17 components to C++20 by @achirkin in https://github.com/nvidia/cuvs/pull/2063
+* fix(ci): resolve all zizmor findings and add zizmor pre-commit checks by @gforsyth in https://github.com/nvidia/cuvs/pull/2053
+* fix(ci): declare explicit secrets in `publish-rust.yaml` by @gforsyth in https://github.com/nvidia/cuvs/pull/2069
+* [REVIEW] Rewrite cuvs-sys build to discover pre-installed cuVS via cmake-package by @yan-zaretskiy in https://github.com/nvidia/cuvs/pull/2022
+* Fix symbol export by @vyasr in https://github.com/nvidia/cuvs/pull/2052
 * fix(ci): add explicit `actions: write` permission for `telemetry-summarize`
- by @gforsyth in https://github.com/rapidsai/cuvs/pull/2075
-* [REVIEW] Improve 1-NN performance with split GEMM/reduction kernels on Blackwell by @vinaydes in https://github.com/rapidsai/cuvs/pull/1768
-* Build and test with CUDA 13.2.0 by @bdice in https://github.com/rapidsai/cuvs/pull/2072
-* Centralize shared utilities across benchmark backends by @jnke2016 in https://github.com/rapidsai/cuvs/pull/2040
-* Persistent CAGRA: benchmark group and bad config warnings by @achirkin in https://github.com/rapidsai/cuvs/pull/2091
-* Multi-GPU Batched KMeans by @viclafargue in https://github.com/rapidsai/cuvs/pull/2017
-* IVF-SQ C API by @viclafargue in https://github.com/rapidsai/cuvs/pull/1910
-* skip CuPy 14.1.0 by @jameslamb in https://github.com/rapidsai/cuvs/pull/2142
+ by @gforsyth in https://github.com/nvidia/cuvs/pull/2075
+* [REVIEW] Improve 1-NN performance with split GEMM/reduction kernels on Blackwell by @vinaydes in https://github.com/nvidia/cuvs/pull/1768
+* Build and test with CUDA 13.2.0 by @bdice in https://github.com/nvidia/cuvs/pull/2072
+* Centralize shared utilities across benchmark backends by @jnke2016 in https://github.com/nvidia/cuvs/pull/2040
+* Persistent CAGRA: benchmark group and bad config warnings by @achirkin in https://github.com/nvidia/cuvs/pull/2091
+* Multi-GPU Batched KMeans by @viclafargue in https://github.com/nvidia/cuvs/pull/2017
+* IVF-SQ C API by @viclafargue in https://github.com/nvidia/cuvs/pull/1910
+* skip CuPy 14.1.0 by @jameslamb in https://github.com/nvidia/cuvs/pull/2142
 
 ## New Contributors
-* @singhmanas1 made their first contribution in https://github.com/rapidsai/cuvs/pull/1300
-* @LizYou made their first contribution in https://github.com/rapidsai/cuvs/pull/1121
-* @jamie8johnson made their first contribution in https://github.com/rapidsai/cuvs/pull/2019
+* @singhmanas1 made their first contribution in https://github.com/nvidia/cuvs/pull/1300
+* @LizYou made their first contribution in https://github.com/nvidia/cuvs/pull/1121
+* @jamie8johnson made their first contribution in https://github.com/nvidia/cuvs/pull/2019
 
-**Full Changelog**: https://github.com/rapidsai/cuvs/compare/v26.06.00a...release/26.06
+**Full Changelog**: https://github.com/nvidia/cuvs/compare/v26.06.00a...release/26.06
 
 # cuvs 26.04.00 (8 Apr 2026)
 
 ### 🚨 Breaking Changes
-* Use HNSW GPU Hierarchy by Default by @julianmi in https://github.com/rapidsai/cuvs/pull/1617
-* Backport "Default to static linking of libcudart" by @KyleFromNVIDIA in https://github.com/rapidsai/cuvs/pull/1918
-* enforce a floor on libnvjitlink, build wheels with CUDA 13.0.x, test wheels against mix of CTK versions by @jameslamb in https://github.com/rapidsai/cuvs/pull/1862
+* Use HNSW GPU Hierarchy by Default by @julianmi in https://github.com/nvidia/cuvs/pull/1617
+* Backport "Default to static linking of libcudart" by @KyleFromNVIDIA in https://github.com/nvidia/cuvs/pull/1918
+* enforce a floor on libnvjitlink, build wheels with CUDA 13.0.x, test wheels against mix of CTK versions by @jameslamb in https://github.com/nvidia/cuvs/pull/1862
 ### 🐛 Bug Fixes
-* Add float16 support for CAGRA extend by @lowener in https://github.com/rapidsai/cuvs/pull/1620
-* Fix C examples CUDAToolkit dependency by @tfeher in https://github.com/rapidsai/cuvs/pull/1626
-* clang-tidy: remove AnalyzeTemporaryDtors key by @achirkin in https://github.com/rapidsai/cuvs/pull/1778
-* Fix build.sh: build cuvs_c for examples and fix --gpu-arch parsing by @achirkin in https://github.com/rapidsai/cuvs/pull/1779
-* Better handling of batching of search in MG replicated mode by @viclafargue in https://github.com/rapidsai/cuvs/pull/1718
-* Add nvjitlink to cuda-toolkit pip extras for cusparse compatibility by @bdice in https://github.com/rapidsai/cuvs/pull/1794
-* [REVIEW] Fix: for balanced kmeans use grid.x for adjust_centers to avoid grid.y overflow for >262K centroids by @Nischal1729 in https://github.com/rapidsai/cuvs/pull/1805
-* Fix thrust header by @aamijar in https://github.com/rapidsai/cuvs/pull/1817
-* Fix persistent CAGRA regressions by @achirkin in https://github.com/rapidsai/cuvs/pull/1800
-* Fix thrust header by @aamijar in https://github.com/rapidsai/cuvs/pull/1825
-* Set `cudaFuncAttributeMaxDynamicSharedMemorySize` with thread-safety by @mythrocks in https://github.com/rapidsai/cuvs/pull/1771
-* [REVIEW] Move from `thrust::make_counting_iterator` to `cuda::make_counting_iterator` by @mythrocks in https://github.com/rapidsai/cuvs/pull/1826
-* Fixed cuvs benchmark debug build issue (linker step fail) by @irina-resh-nvda in https://github.com/rapidsai/cuvs/pull/1599
-* Graph degree equals intermediate graph degree bug fix by @irina-resh-nvda in https://github.com/rapidsai/cuvs/pull/1834
-* FAISS patch for `thrust_counting_iterator.h` by @aamijar in https://github.com/rapidsai/cuvs/pull/1844
-* Use 1D grid calculations in `epsilon_neighborhood` by @divyegala in https://github.com/rapidsai/cuvs/pull/1847
-* Fix setting CAGRA graph build algo to iterative search by default by @achirkin in https://github.com/rapidsai/cuvs/pull/1864
-* Faiss suppress warning 611 by @aamijar in https://github.com/rapidsai/cuvs/pull/1879
-* Pin faiss to 1.14.0 by @aamijar in https://github.com/rapidsai/cuvs/pull/1885
-* Make some minor fixes to JIT+LTO functionality by @KyleFromNVIDIA in https://github.com/rapidsai/cuvs/pull/1896
-* Revert "Default to static linking of libcudart" by @KyleFromNVIDIA in https://github.com/rapidsai/cuvs/pull/1925
-* Fix hanging pytests by @aamijar in https://github.com/rapidsai/cuvs/pull/1924
-* Disallow programmatic stream serialization in JIT kernel launches by @divyegala in https://github.com/rapidsai/cuvs/pull/1932
-* IVF-Flat: fix irrelevant assert in the fused kernel mode by @achirkin in https://github.com/rapidsai/cuvs/pull/1941
-* [REVIEW] cuVS bench: Fix cudaFuncSetAttribute not being called when CAGRA search switches kernel variants by @irina-resh-nvda in https://github.com/rapidsai/cuvs/pull/1851
-* Pin openblas for aarch64 by @tarang-jain in https://github.com/rapidsai/cuvs/pull/1965
-* Null JIT kernel launch config by @divyegala in https://github.com/rapidsai/cuvs/pull/1974
-* [BUG] Fix Vamana Serialization by @tarang-jain in https://github.com/rapidsai/cuvs/pull/1966
+* Add float16 support for CAGRA extend by @lowener in https://github.com/nvidia/cuvs/pull/1620
+* Fix C examples CUDAToolkit dependency by @tfeher in https://github.com/nvidia/cuvs/pull/1626
+* clang-tidy: remove AnalyzeTemporaryDtors key by @achirkin in https://github.com/nvidia/cuvs/pull/1778
+* Fix build.sh: build cuvs_c for examples and fix --gpu-arch parsing by @achirkin in https://github.com/nvidia/cuvs/pull/1779
+* Better handling of batching of search in MG replicated mode by @viclafargue in https://github.com/nvidia/cuvs/pull/1718
+* Add nvjitlink to cuda-toolkit pip extras for cusparse compatibility by @bdice in https://github.com/nvidia/cuvs/pull/1794
+* [REVIEW] Fix: for balanced kmeans use grid.x for adjust_centers to avoid grid.y overflow for >262K centroids by @Nischal1729 in https://github.com/nvidia/cuvs/pull/1805
+* Fix thrust header by @aamijar in https://github.com/nvidia/cuvs/pull/1817
+* Fix persistent CAGRA regressions by @achirkin in https://github.com/nvidia/cuvs/pull/1800
+* Fix thrust header by @aamijar in https://github.com/nvidia/cuvs/pull/1825
+* Set `cudaFuncAttributeMaxDynamicSharedMemorySize` with thread-safety by @mythrocks in https://github.com/nvidia/cuvs/pull/1771
+* [REVIEW] Move from `thrust::make_counting_iterator` to `cuda::make_counting_iterator` by @mythrocks in https://github.com/nvidia/cuvs/pull/1826
+* Fixed cuvs benchmark debug build issue (linker step fail) by @irina-resh-nvda in https://github.com/nvidia/cuvs/pull/1599
+* Graph degree equals intermediate graph degree bug fix by @irina-resh-nvda in https://github.com/nvidia/cuvs/pull/1834
+* FAISS patch for `thrust_counting_iterator.h` by @aamijar in https://github.com/nvidia/cuvs/pull/1844
+* Use 1D grid calculations in `epsilon_neighborhood` by @divyegala in https://github.com/nvidia/cuvs/pull/1847
+* Fix setting CAGRA graph build algo to iterative search by default by @achirkin in https://github.com/nvidia/cuvs/pull/1864
+* Faiss suppress warning 611 by @aamijar in https://github.com/nvidia/cuvs/pull/1879
+* Pin faiss to 1.14.0 by @aamijar in https://github.com/nvidia/cuvs/pull/1885
+* Make some minor fixes to JIT+LTO functionality by @KyleFromNVIDIA in https://github.com/nvidia/cuvs/pull/1896
+* Revert "Default to static linking of libcudart" by @KyleFromNVIDIA in https://github.com/nvidia/cuvs/pull/1925
+* Fix hanging pytests by @aamijar in https://github.com/nvidia/cuvs/pull/1924
+* Disallow programmatic stream serialization in JIT kernel launches by @divyegala in https://github.com/nvidia/cuvs/pull/1932
+* IVF-Flat: fix irrelevant assert in the fused kernel mode by @achirkin in https://github.com/nvidia/cuvs/pull/1941
+* [REVIEW] cuVS bench: Fix cudaFuncSetAttribute not being called when CAGRA search switches kernel variants by @irina-resh-nvda in https://github.com/nvidia/cuvs/pull/1851
+* Pin openblas for aarch64 by @tarang-jain in https://github.com/nvidia/cuvs/pull/1965
+* Null JIT kernel launch config by @divyegala in https://github.com/nvidia/cuvs/pull/1974
+* [BUG] Fix Vamana Serialization by @tarang-jain in https://github.com/nvidia/cuvs/pull/1966
 ### 📖 Documentation
-* remove docs references to cuvs-bench-datasets, install a C compiler in pre-commit env by @jameslamb in https://github.com/rapidsai/cuvs/pull/1736
-* Adding simple tarball install to build and install docs by @cjnolet in https://github.com/rapidsai/cuvs/pull/1868
-* Fixing nccl link in tarball install instructions by @cjnolet in https://github.com/rapidsai/cuvs/pull/1887
-* Add developer and user guides for JIT by @divyegala in https://github.com/rapidsai/cuvs/pull/1876
-* PCA docs by @aamijar in https://github.com/rapidsai/cuvs/pull/1949
-* Fix a few typos and ``.rst`` link syntax by @jrbourbeau in https://github.com/rapidsai/cuvs/pull/1973
-* Update docs footer year by @aamijar in https://github.com/rapidsai/cuvs/pull/1958
-* Doc improvements by @aamijar in https://github.com/rapidsai/cuvs/pull/1978
+* remove docs references to cuvs-bench-datasets, install a C compiler in pre-commit env by @jameslamb in https://github.com/nvidia/cuvs/pull/1736
+* Adding simple tarball install to build and install docs by @cjnolet in https://github.com/nvidia/cuvs/pull/1868
+* Fixing nccl link in tarball install instructions by @cjnolet in https://github.com/nvidia/cuvs/pull/1887
+* Add developer and user guides for JIT by @divyegala in https://github.com/nvidia/cuvs/pull/1876
+* PCA docs by @aamijar in https://github.com/nvidia/cuvs/pull/1949
+* Fix a few typos and ``.rst`` link syntax by @jrbourbeau in https://github.com/nvidia/cuvs/pull/1973
+* Update docs footer year by @aamijar in https://github.com/nvidia/cuvs/pull/1958
+* Doc improvements by @aamijar in https://github.com/nvidia/cuvs/pull/1978
 ### 🚀 New Features
-* JIT compile `interleaved_scan_kernel` for CUDA 13 by @divyegala in https://github.com/rapidsai/cuvs/pull/1405
-* [REVIEW] L1 distance support for iterative search CAGRA build by @yan-zaretskiy in https://github.com/rapidsai/cuvs/pull/1831
-* [FEA] Inertia Computation for Balanced KMeans and Add Option for Weighted Inertia by @tarang-jain in https://github.com/rapidsai/cuvs/pull/1880
-* PCA preprocessor by @aamijar in https://github.com/rapidsai/cuvs/pull/1808
-* [FEA] Add Batching to KMeans by @tarang-jain in https://github.com/rapidsai/cuvs/pull/1886
+* JIT compile `interleaved_scan_kernel` for CUDA 13 by @divyegala in https://github.com/nvidia/cuvs/pull/1405
+* [REVIEW] L1 distance support for iterative search CAGRA build by @yan-zaretskiy in https://github.com/nvidia/cuvs/pull/1831
+* [FEA] Inertia Computation for Balanced KMeans and Add Option for Weighted Inertia by @tarang-jain in https://github.com/nvidia/cuvs/pull/1880
+* PCA preprocessor by @aamijar in https://github.com/nvidia/cuvs/pull/1808
+* [FEA] Add Batching to KMeans by @tarang-jain in https://github.com/nvidia/cuvs/pull/1886
 ### 🛠️ Improvements
-* Automatic Partition Count Derivation for ACE  by @julianmi in https://github.com/rapidsai/cuvs/pull/1603
-* Add filter for cagra::merge by @benfred in https://github.com/rapidsai/cuvs/pull/1496
-* Ivf_flat extends golang APIs by @cpegeric in https://github.com/rapidsai/cuvs/pull/1600
-* Drop Python 3.10 support by @gforsyth in https://github.com/rapidsai/cuvs/pull/1748
-* tighten wheel size limits, expand CI-skipping logic, other small build changes by @jameslamb in https://github.com/rapidsai/cuvs/pull/1751
-* Migrate hash strategy to use the new cuco::static_map by @PointKernel in https://github.com/rapidsai/cuvs/pull/1462
-* Update raft headers by @aamijar in https://github.com/rapidsai/cuvs/pull/1763
-* remove pip.conf migration code in CI scripts, update CI-skipping rules by @jameslamb in https://github.com/rapidsai/cuvs/pull/1760
-* Convert non-type template parameters to runtime parameters in CAGRA search to cut binary size by @seunghwak in https://github.com/rapidsai/cuvs/pull/1498
-* Rename lanczos by @aamijar in https://github.com/rapidsai/cuvs/pull/1759
-* CI: build with CUDA 13.1.1 by @jameslamb in https://github.com/rapidsai/cuvs/pull/1766
-* Fixes for stricter compilers by @maxwbuckley in https://github.com/rapidsai/cuvs/pull/1703
-* fix cpu_search call by including `k` argument by @benfred in https://github.com/rapidsai/cuvs/pull/1785
-* Use GHA id-token for `sccache-dist` auth token by @trxcllnt in https://github.com/rapidsai/cuvs/pull/1790
-* Remove `cagra_optimize.hpp` by @aamijar in https://github.com/rapidsai/cuvs/pull/1791
-* Remove unused use_norms constant from kernel_sm60.cuh by @maxwbuckley in https://github.com/rapidsai/cuvs/pull/1705
-* Remove unused variable read_idx from query loop by @maxwbuckley in https://github.com/rapidsai/cuvs/pull/1706
-* remove gitutils by @jameslamb in https://github.com/rapidsai/cuvs/pull/1797
-* refactor: build wheels and conda packages using Python limited API by @gforsyth in https://github.com/rapidsai/cuvs/pull/1788
-* Add pluggable backend architecture to cuvs-bench by @jnke2016 in https://github.com/rapidsai/cuvs/pull/1536
-* Use Specific CCCL Includes by @divyegala in https://github.com/rapidsai/cuvs/pull/1806
-* Replace `thrust::tuple` with `cuda::std::tuple` by @miscco in https://github.com/rapidsai/cuvs/pull/1811
-* check-nightly-ci: update to new version by @jameslamb in https://github.com/rapidsai/cuvs/pull/1813
-* check-nightly-ci: remove testing config by @jameslamb in https://github.com/rapidsai/cuvs/pull/1824
-* Refactor JIT LTO kernel generation by @KyleFromNVIDIA in https://github.com/rapidsai/cuvs/pull/1812
-* Move `test_compute_matrix_product.py` to `cpp/tests` by @KyleFromNVIDIA in https://github.com/rapidsai/cuvs/pull/1836
-* Drop uses of `thrust/functional.h` by @miscco in https://github.com/rapidsai/cuvs/pull/1835
-* Better `ncv` param spectral embedding edge case by @aamijar in https://github.com/rapidsai/cuvs/pull/1828
-* Modernize the uses of raft in cuVS by @achirkin in https://github.com/rapidsai/cuvs/pull/1837
+* Automatic Partition Count Derivation for ACE  by @julianmi in https://github.com/nvidia/cuvs/pull/1603
+* Add filter for cagra::merge by @benfred in https://github.com/nvidia/cuvs/pull/1496
+* Ivf_flat extends golang APIs by @cpegeric in https://github.com/nvidia/cuvs/pull/1600
+* Drop Python 3.10 support by @gforsyth in https://github.com/nvidia/cuvs/pull/1748
+* tighten wheel size limits, expand CI-skipping logic, other small build changes by @jameslamb in https://github.com/nvidia/cuvs/pull/1751
+* Migrate hash strategy to use the new cuco::static_map by @PointKernel in https://github.com/nvidia/cuvs/pull/1462
+* Update raft headers by @aamijar in https://github.com/nvidia/cuvs/pull/1763
+* remove pip.conf migration code in CI scripts, update CI-skipping rules by @jameslamb in https://github.com/nvidia/cuvs/pull/1760
+* Convert non-type template parameters to runtime parameters in CAGRA search to cut binary size by @seunghwak in https://github.com/nvidia/cuvs/pull/1498
+* Rename lanczos by @aamijar in https://github.com/nvidia/cuvs/pull/1759
+* CI: build with CUDA 13.1.1 by @jameslamb in https://github.com/nvidia/cuvs/pull/1766
+* Fixes for stricter compilers by @maxwbuckley in https://github.com/nvidia/cuvs/pull/1703
+* fix cpu_search call by including `k` argument by @benfred in https://github.com/nvidia/cuvs/pull/1785
+* Use GHA id-token for `sccache-dist` auth token by @trxcllnt in https://github.com/nvidia/cuvs/pull/1790
+* Remove `cagra_optimize.hpp` by @aamijar in https://github.com/nvidia/cuvs/pull/1791
+* Remove unused use_norms constant from kernel_sm60.cuh by @maxwbuckley in https://github.com/nvidia/cuvs/pull/1705
+* Remove unused variable read_idx from query loop by @maxwbuckley in https://github.com/nvidia/cuvs/pull/1706
+* remove gitutils by @jameslamb in https://github.com/nvidia/cuvs/pull/1797
+* refactor: build wheels and conda packages using Python limited API by @gforsyth in https://github.com/nvidia/cuvs/pull/1788
+* Add pluggable backend architecture to cuvs-bench by @jnke2016 in https://github.com/nvidia/cuvs/pull/1536
+* Use Specific CCCL Includes by @divyegala in https://github.com/nvidia/cuvs/pull/1806
+* Replace `thrust::tuple` with `cuda::std::tuple` by @miscco in https://github.com/nvidia/cuvs/pull/1811
+* check-nightly-ci: update to new version by @jameslamb in https://github.com/nvidia/cuvs/pull/1813
+* check-nightly-ci: remove testing config by @jameslamb in https://github.com/nvidia/cuvs/pull/1824
+* Refactor JIT LTO kernel generation by @KyleFromNVIDIA in https://github.com/nvidia/cuvs/pull/1812
+* Move `test_compute_matrix_product.py` to `cpp/tests` by @KyleFromNVIDIA in https://github.com/nvidia/cuvs/pull/1836
+* Drop uses of `thrust/functional.h` by @miscco in https://github.com/nvidia/cuvs/pull/1835
+* Better `ncv` param spectral embedding edge case by @aamijar in https://github.com/nvidia/cuvs/pull/1828
+* Modernize the uses of raft in cuVS by @achirkin in https://github.com/nvidia/cuvs/pull/1837
 * refactor(limited api): add explicit `wheel.py-api` to `pyproject.toml`
- by @gforsyth in https://github.com/rapidsai/cuvs/pull/1852
-* Update Cython lower bound pin to 3.2.2 by @vyasr in https://github.com/rapidsai/cuvs/pull/1858
-* Add support for Python 3.14 by @gforsyth in https://github.com/rapidsai/cuvs/pull/1845
-* fix(rust): change Index::search to take &self instead of self by @zbennett10 in https://github.com/rapidsai/cuvs/pull/1839
-* Remove pytest upper bound pin by @vyasr in https://github.com/rapidsai/cuvs/pull/1867
-* Readme: vecflow paper in references by @aamijar in https://github.com/rapidsai/cuvs/pull/1874
-* Refactor knn graph build params to remove `graph_build_types.hpp` by @jinsolp in https://github.com/rapidsai/cuvs/pull/1235
-* Remove IndexWrapper by @aamijar in https://github.com/rapidsai/cuvs/pull/1792
-* Use C linkage for JIT LTO kernels by @divyegala in https://github.com/rapidsai/cuvs/pull/1909
-* Prevent nested parallelism in HNSW bench by @julianmi in https://github.com/rapidsai/cuvs/pull/1895
-* Add a script to check for breaking C ABI changes by @benfred in https://github.com/rapidsai/cuvs/pull/1749
-* Improve the IVF-PQ Coarse Batch Size Workspace Estimation by @julianmi in https://github.com/rapidsai/cuvs/pull/1937
-* ScaNN: Fix AVQ prefetch by @rmaschal in https://github.com/rapidsai/cuvs/pull/1899
+ by @gforsyth in https://github.com/nvidia/cuvs/pull/1852
+* Update Cython lower bound pin to 3.2.2 by @vyasr in https://github.com/nvidia/cuvs/pull/1858
+* Add support for Python 3.14 by @gforsyth in https://github.com/nvidia/cuvs/pull/1845
+* fix(rust): change Index::search to take &self instead of self by @zbennett10 in https://github.com/nvidia/cuvs/pull/1839
+* Remove pytest upper bound pin by @vyasr in https://github.com/nvidia/cuvs/pull/1867
+* Readme: vecflow paper in references by @aamijar in https://github.com/nvidia/cuvs/pull/1874
+* Refactor knn graph build params to remove `graph_build_types.hpp` by @jinsolp in https://github.com/nvidia/cuvs/pull/1235
+* Remove IndexWrapper by @aamijar in https://github.com/nvidia/cuvs/pull/1792
+* Use C linkage for JIT LTO kernels by @divyegala in https://github.com/nvidia/cuvs/pull/1909
+* Prevent nested parallelism in HNSW bench by @julianmi in https://github.com/nvidia/cuvs/pull/1895
+* Add a script to check for breaking C ABI changes by @benfred in https://github.com/nvidia/cuvs/pull/1749
+* Improve the IVF-PQ Coarse Batch Size Workspace Estimation by @julianmi in https://github.com/nvidia/cuvs/pull/1937
+* ScaNN: Fix AVQ prefetch by @rmaschal in https://github.com/nvidia/cuvs/pull/1899
 
 ## New Contributors
-* @cpegeric made their first contribution in https://github.com/rapidsai/cuvs/pull/1600
-* @PointKernel made their first contribution in https://github.com/rapidsai/cuvs/pull/1462
-* @seunghwak made their first contribution in https://github.com/rapidsai/cuvs/pull/1498
-* @Nischal1729 made their first contribution in https://github.com/rapidsai/cuvs/pull/1805
-* @jnke2016 made their first contribution in https://github.com/rapidsai/cuvs/pull/1536
-* @zbennett10 made their first contribution in https://github.com/rapidsai/cuvs/pull/1839
-* @jrbourbeau made their first contribution in https://github.com/rapidsai/cuvs/pull/1973
+* @cpegeric made their first contribution in https://github.com/nvidia/cuvs/pull/1600
+* @PointKernel made their first contribution in https://github.com/nvidia/cuvs/pull/1462
+* @seunghwak made their first contribution in https://github.com/nvidia/cuvs/pull/1498
+* @Nischal1729 made their first contribution in https://github.com/nvidia/cuvs/pull/1805
+* @jnke2016 made their first contribution in https://github.com/nvidia/cuvs/pull/1536
+* @zbennett10 made their first contribution in https://github.com/nvidia/cuvs/pull/1839
+* @jrbourbeau made their first contribution in https://github.com/nvidia/cuvs/pull/1973
 
-**Full Changelog**: https://github.com/rapidsai/cuvs/compare/v26.04.00a...release/26.04
+**Full Changelog**: https://github.com/nvidia/cuvs/compare/v26.04.00a...release/26.04
 
 # cuvs 26.02.00 (4 Feb 2026)
 
 ### 🚨 Breaking Changes
-* Use CCCL's mdspan implementation by @divyegala in https://github.com/rapidsai/cuvs/pull/1605
-* Add filter for cagra::merge  by @benfred in https://github.com/rapidsai/cuvs/pull/1755
+* Use CCCL's mdspan implementation by @divyegala in https://github.com/nvidia/cuvs/pull/1605
+* Add filter for cagra::merge  by @benfred in https://github.com/nvidia/cuvs/pull/1755
 ### 🐛 Bug Fixes
-* fix(ci): remove unknown parameter `name` from rocky8 build job by @gforsyth in https://github.com/rapidsai/cuvs/pull/1554
-* CMake check for FAISS use in benchmarks by @irina-resh-nvda in https://github.com/rapidsai/cuvs/pull/1591
-* Fix overflow in `preprocess_data_kernel` of NN Descent by @jinsolp in https://github.com/rapidsai/cuvs/pull/1596
-* cmake is missing `sparse/gram.cu` gtest by @aamijar in https://github.com/rapidsai/cuvs/pull/1611
-* Correctly specify the ninja-build version to download by @robertmaynard in https://github.com/rapidsai/cuvs/pull/1634
-* Remove Flaky Python ACE Example by @julianmi in https://github.com/rapidsai/cuvs/pull/1641
-* Fix integer overflow in get_free_host_memory() on systems with >2TB RAM by @bdice in https://github.com/rapidsai/cuvs/pull/1636
-* Fix libclang download for Rust, CUDA initialization for C tests by @KyleFromNVIDIA in https://github.com/rapidsai/cuvs/pull/1652
-* Skip Python Multi-GPU Doctests by @julianmi in https://github.com/rapidsai/cuvs/pull/1671
-* [Bug] Fix memory allocation test in CAGRA graph optimization by @enp1s0 in https://github.com/rapidsai/cuvs/pull/1675
-* Fix multi-GPU All Neighbors memory coherence issue on older platforms by @viclafargue in https://github.com/rapidsai/cuvs/pull/1713
+* fix(ci): remove unknown parameter `name` from rocky8 build job by @gforsyth in https://github.com/nvidia/cuvs/pull/1554
+* CMake check for FAISS use in benchmarks by @irina-resh-nvda in https://github.com/nvidia/cuvs/pull/1591
+* Fix overflow in `preprocess_data_kernel` of NN Descent by @jinsolp in https://github.com/nvidia/cuvs/pull/1596
+* cmake is missing `sparse/gram.cu` gtest by @aamijar in https://github.com/nvidia/cuvs/pull/1611
+* Correctly specify the ninja-build version to download by @robertmaynard in https://github.com/nvidia/cuvs/pull/1634
+* Remove Flaky Python ACE Example by @julianmi in https://github.com/nvidia/cuvs/pull/1641
+* Fix integer overflow in get_free_host_memory() on systems with >2TB RAM by @bdice in https://github.com/nvidia/cuvs/pull/1636
+* Fix libclang download for Rust, CUDA initialization for C tests by @KyleFromNVIDIA in https://github.com/nvidia/cuvs/pull/1652
+* Skip Python Multi-GPU Doctests by @julianmi in https://github.com/nvidia/cuvs/pull/1671
+* [Bug] Fix memory allocation test in CAGRA graph optimization by @enp1s0 in https://github.com/nvidia/cuvs/pull/1675
+* Fix multi-GPU All Neighbors memory coherence issue on older platforms by @viclafargue in https://github.com/nvidia/cuvs/pull/1713
 ### 📖 Documentation
-* Clean `raft/neighbors` comments by @aamijar in https://github.com/rapidsai/cuvs/pull/1651
-* Clean `raft::neighbors` comments by @aamijar in https://github.com/rapidsai/cuvs/pull/1665
+* Clean `raft/neighbors` comments by @aamijar in https://github.com/nvidia/cuvs/pull/1651
+* Clean `raft::neighbors` comments by @aamijar in https://github.com/nvidia/cuvs/pull/1665
 ### 🚀 New Features
-* Assign the c/ folder to the the c code ownder group by @robertmaynard in https://github.com/rapidsai/cuvs/pull/1573
-* Add arm64 builds to the libcuvs_c rocky8 matrix by @robertmaynard in https://github.com/rapidsai/cuvs/pull/1570
-* [FEA]  Enforce tighter link restrictions on libcuvs_c by @robertmaynard in https://github.com/rapidsai/cuvs/pull/1614
-* [FEA] IVF-PQ Build Factories for Precomputed Centroids and Codebooks by @tarang-jain in https://github.com/rapidsai/cuvs/pull/1483
-* Add HNSW ACE build method by @julianmi in https://github.com/rapidsai/cuvs/pull/1597
-* [FEA] C + Python API for IVF-PQ Build Factories with Precomputed Centroids by @tarang-jain in https://github.com/rapidsai/cuvs/pull/1664
-* Add arm64 builds to the libcuvs_c build.yaml matrix by @robertmaynard in https://github.com/rapidsai/cuvs/pull/1701
-* pre-built libcuvs_c.so now use the new ABI major/minor values by @robertmaynard in https://github.com/rapidsai/cuvs/pull/1708
-* Correct base release for cuvs abi 1 major by @robertmaynard in https://github.com/rapidsai/cuvs/pull/1724
-* Add support for PQ preprocessing API by @lowener in https://github.com/rapidsai/cuvs/pull/1278
-* [FEA] IVF-PQ to Write Flat PQ Codes by @tarang-jain in https://github.com/rapidsai/cuvs/pull/1607
-* Enable collation of all license files into pre-built binaries by @robertmaynard in https://github.com/rapidsai/cuvs/pull/1679
+* Assign the c/ folder to the the c code ownder group by @robertmaynard in https://github.com/nvidia/cuvs/pull/1573
+* Add arm64 builds to the libcuvs_c rocky8 matrix by @robertmaynard in https://github.com/nvidia/cuvs/pull/1570
+* [FEA]  Enforce tighter link restrictions on libcuvs_c by @robertmaynard in https://github.com/nvidia/cuvs/pull/1614
+* [FEA] IVF-PQ Build Factories for Precomputed Centroids and Codebooks by @tarang-jain in https://github.com/nvidia/cuvs/pull/1483
+* Add HNSW ACE build method by @julianmi in https://github.com/nvidia/cuvs/pull/1597
+* [FEA] C + Python API for IVF-PQ Build Factories with Precomputed Centroids by @tarang-jain in https://github.com/nvidia/cuvs/pull/1664
+* Add arm64 builds to the libcuvs_c build.yaml matrix by @robertmaynard in https://github.com/nvidia/cuvs/pull/1701
+* pre-built libcuvs_c.so now use the new ABI major/minor values by @robertmaynard in https://github.com/nvidia/cuvs/pull/1708
+* Correct base release for cuvs abi 1 major by @robertmaynard in https://github.com/nvidia/cuvs/pull/1724
+* Add support for PQ preprocessing API by @lowener in https://github.com/nvidia/cuvs/pull/1278
+* [FEA] IVF-PQ to Write Flat PQ Codes by @tarang-jain in https://github.com/nvidia/cuvs/pull/1607
+* Enable collation of all license files into pre-built binaries by @robertmaynard in https://github.com/nvidia/cuvs/pull/1679
 ### 🛠️ Improvements
-* Update FAISS patch for RMM memory resource header migration by @bdice in https://github.com/rapidsai/cuvs/pull/1566
-* Use strict priority in CI conda tests by @bdice in https://github.com/rapidsai/cuvs/pull/1583
-* Update FAISS from 1.12.0 to 1.13.0 by @bdice in https://github.com/rapidsai/cuvs/pull/1585
-* Use strict priority in CI conda tests by @bdice in https://github.com/rapidsai/cuvs/pull/1606
-* Deduplicate `{unpack/pack}_list_data_kernel` by @jinsolp in https://github.com/rapidsai/cuvs/pull/1609
-* Expose NN Descent fp16 data type support to python by @jinsolp in https://github.com/rapidsai/cuvs/pull/1616
-* Remove alpha specs from non-RAPIDS dependencies by @bdice in https://github.com/rapidsai/cuvs/pull/1618
-* Enable merge barriers by @KyleFromNVIDIA in https://github.com/rapidsai/cuvs/pull/1625
-* Add devcontainer fallback for C++ test location by @bdice in https://github.com/rapidsai/cuvs/pull/1631
-* Dispatch to use fp32 distance computation in NN Descent depending on data dimensions by @jinsolp in https://github.com/rapidsai/cuvs/pull/1415
-* Prepare cuvs for removal of deprecated raft apis by @aamijar in https://github.com/rapidsai/cuvs/pull/1610
-* Shorten the Test Duration of test_cagra_ace.py by @julianmi in https://github.com/rapidsai/cuvs/pull/1640
-* Optional seed spectral embedding by @aamijar in https://github.com/rapidsai/cuvs/pull/1639
-* FAISS patch for removed raft headers by @aamijar in https://github.com/rapidsai/cuvs/pull/1654
-* Empty commit to trigger a build by @bdice in https://github.com/rapidsai/cuvs/pull/1662
-* Spectral Clustering dataset api by @aamijar in https://github.com/rapidsai/cuvs/pull/1653
-* Hide kernel symbols  by @divyegala in https://github.com/rapidsai/cuvs/pull/1663
-* Update cuVS to c++20 by @divyegala in https://github.com/rapidsai/cuvs/pull/1649
-* Move faiss_select from raft to cuvs by @aamijar in https://github.com/rapidsai/cuvs/pull/1658
-* Fix DiskANN Override by @tarang-jain in https://github.com/rapidsai/cuvs/pull/1666
-* Use SPDX license identifiers in pyproject.toml, bump build dependency floors by @jameslamb in https://github.com/rapidsai/cuvs/pull/1667
-* Add CUDA 13.1 support by @bdice in https://github.com/rapidsai/cuvs/pull/1642
-* Remove faiss patch by @aamijar in https://github.com/rapidsai/cuvs/pull/1668
-* Expose pq list data to python by @benfred in https://github.com/rapidsai/cuvs/pull/1428
-* Add rust bindings for vamana index by @benfred in https://github.com/rapidsai/cuvs/pull/1608
-* build and test against CUDA 13.1.0 by @jameslamb in https://github.com/rapidsai/cuvs/pull/1677
-* [Improvement] Allow Configurable DRAM in DiskANN SSD Wrapper by @tarang-jain in https://github.com/rapidsai/cuvs/pull/1598
-* prefer CUDA 13.1 devcontainers, react to some cutlass removals in RAFT by @jameslamb in https://github.com/rapidsai/cuvs/pull/1686
-* Empty commit to trigger a build by @jameslamb in https://github.com/rapidsai/cuvs/pull/1694
-* Spectral Embedding `nnz_t` by @aamijar in https://github.com/rapidsai/cuvs/pull/1628
-* Avoid installing CCCL headers in wheels by @divyegala in https://github.com/rapidsai/cuvs/pull/1692
-* Use main shared-workflows branch by @jameslamb in https://github.com/rapidsai/cuvs/pull/1707
-* Add Helper to Create NumPy Files by @julianmi in https://github.com/rapidsai/cuvs/pull/1645
-* Spectral Embedding with `all_neighbors` by @aamijar in https://github.com/rapidsai/cuvs/pull/1693
-* Deduplicate `calc_chunk_indices_kernel` by @jinsolp in https://github.com/rapidsai/cuvs/pull/1657
-* wheel builds: react to changes in pip's handling of build constraints by @mmccarty in https://github.com/rapidsai/cuvs/pull/1710
-* fix(build): build package on merge to `release/*` branch by @gforsyth in https://github.com/rapidsai/cuvs/pull/1733
-* Add a `transform` function to ivf_pq by @benfred in https://github.com/rapidsai/cuvs/pull/1732
+* Update FAISS patch for RMM memory resource header migration by @bdice in https://github.com/nvidia/cuvs/pull/1566
+* Use strict priority in CI conda tests by @bdice in https://github.com/nvidia/cuvs/pull/1583
+* Update FAISS from 1.12.0 to 1.13.0 by @bdice in https://github.com/nvidia/cuvs/pull/1585
+* Use strict priority in CI conda tests by @bdice in https://github.com/nvidia/cuvs/pull/1606
+* Deduplicate `{unpack/pack}_list_data_kernel` by @jinsolp in https://github.com/nvidia/cuvs/pull/1609
+* Expose NN Descent fp16 data type support to python by @jinsolp in https://github.com/nvidia/cuvs/pull/1616
+* Remove alpha specs from non-RAPIDS dependencies by @bdice in https://github.com/nvidia/cuvs/pull/1618
+* Enable merge barriers by @KyleFromNVIDIA in https://github.com/nvidia/cuvs/pull/1625
+* Add devcontainer fallback for C++ test location by @bdice in https://github.com/nvidia/cuvs/pull/1631
+* Dispatch to use fp32 distance computation in NN Descent depending on data dimensions by @jinsolp in https://github.com/nvidia/cuvs/pull/1415
+* Prepare cuvs for removal of deprecated raft apis by @aamijar in https://github.com/nvidia/cuvs/pull/1610
+* Shorten the Test Duration of test_cagra_ace.py by @julianmi in https://github.com/nvidia/cuvs/pull/1640
+* Optional seed spectral embedding by @aamijar in https://github.com/nvidia/cuvs/pull/1639
+* FAISS patch for removed raft headers by @aamijar in https://github.com/nvidia/cuvs/pull/1654
+* Empty commit to trigger a build by @bdice in https://github.com/nvidia/cuvs/pull/1662
+* Spectral Clustering dataset api by @aamijar in https://github.com/nvidia/cuvs/pull/1653
+* Hide kernel symbols  by @divyegala in https://github.com/nvidia/cuvs/pull/1663
+* Update cuVS to c++20 by @divyegala in https://github.com/nvidia/cuvs/pull/1649
+* Move faiss_select from raft to cuvs by @aamijar in https://github.com/nvidia/cuvs/pull/1658
+* Fix DiskANN Override by @tarang-jain in https://github.com/nvidia/cuvs/pull/1666
+* Use SPDX license identifiers in pyproject.toml, bump build dependency floors by @jameslamb in https://github.com/nvidia/cuvs/pull/1667
+* Add CUDA 13.1 support by @bdice in https://github.com/nvidia/cuvs/pull/1642
+* Remove faiss patch by @aamijar in https://github.com/nvidia/cuvs/pull/1668
+* Expose pq list data to python by @benfred in https://github.com/nvidia/cuvs/pull/1428
+* Add rust bindings for vamana index by @benfred in https://github.com/nvidia/cuvs/pull/1608
+* build and test against CUDA 13.1.0 by @jameslamb in https://github.com/nvidia/cuvs/pull/1677
+* [Improvement] Allow Configurable DRAM in DiskANN SSD Wrapper by @tarang-jain in https://github.com/nvidia/cuvs/pull/1598
+* prefer CUDA 13.1 devcontainers, react to some cutlass removals in RAFT by @jameslamb in https://github.com/nvidia/cuvs/pull/1686
+* Empty commit to trigger a build by @jameslamb in https://github.com/nvidia/cuvs/pull/1694
+* Spectral Embedding `nnz_t` by @aamijar in https://github.com/nvidia/cuvs/pull/1628
+* Avoid installing CCCL headers in wheels by @divyegala in https://github.com/nvidia/cuvs/pull/1692
+* Use main shared-workflows branch by @jameslamb in https://github.com/nvidia/cuvs/pull/1707
+* Add Helper to Create NumPy Files by @julianmi in https://github.com/nvidia/cuvs/pull/1645
+* Spectral Embedding with `all_neighbors` by @aamijar in https://github.com/nvidia/cuvs/pull/1693
+* Deduplicate `calc_chunk_indices_kernel` by @jinsolp in https://github.com/nvidia/cuvs/pull/1657
+* wheel builds: react to changes in pip's handling of build constraints by @mmccarty in https://github.com/nvidia/cuvs/pull/1710
+* fix(build): build package on merge to `release/*` branch by @gforsyth in https://github.com/nvidia/cuvs/pull/1733
+* Add a `transform` function to ivf_pq by @benfred in https://github.com/nvidia/cuvs/pull/1732
 
 ## New Contributors
-* @irina-resh-nvda made their first contribution in https://github.com/rapidsai/cuvs/pull/1591
+* @irina-resh-nvda made their first contribution in https://github.com/nvidia/cuvs/pull/1591
 
-**Full Changelog**: https://github.com/rapidsai/cuvs/compare/v26.02.00a...release/26.02
+**Full Changelog**: https://github.com/nvidia/cuvs/compare/v26.02.00a...release/26.02
 
 # cuvs 25.12.00 (10 Dec 2025)
 
 ### 🚨 Breaking Changes
-* Using `all_neighbors` for mutual reachability by @jinsolp in https://github.com/rapidsai/cuvs/pull/1234
-* Refactor libcuvs_c header and source locations by @robertmaynard in https://github.com/rapidsai/cuvs/pull/1357
-* Update cagra C API enums to have more long term stable values by @robertmaynard in https://github.com/rapidsai/cuvs/pull/1436
-* Require CUDA 12.2+ by @jakirkham in https://github.com/rapidsai/cuvs/pull/1476
-* Remove mutual_reachability_graph Public API by @tarang-jain in https://github.com/rapidsai/cuvs/pull/1481
+* Using `all_neighbors` for mutual reachability by @jinsolp in https://github.com/nvidia/cuvs/pull/1234
+* Refactor libcuvs_c header and source locations by @robertmaynard in https://github.com/nvidia/cuvs/pull/1357
+* Update cagra C API enums to have more long term stable values by @robertmaynard in https://github.com/nvidia/cuvs/pull/1436
+* Require CUDA 12.2+ by @jakirkham in https://github.com/nvidia/cuvs/pull/1476
+* Remove mutual_reachability_graph Public API by @tarang-jain in https://github.com/nvidia/cuvs/pull/1481
 ### 🐛 Bug Fixes
-* Allow compilation when OpenMP is disabled by @robertmaynard in https://github.com/rapidsai/cuvs/pull/1346
-* Deallocation should be noexcept by @bdice in https://github.com/rapidsai/cuvs/pull/1416
-* ANN_BENCH: Don't throw in noexcept do_deallocate by @achirkin in https://github.com/rapidsai/cuvs/pull/1417
-* Remove unneeded cutlass public build/install dependency by @robertmaynard in https://github.com/rapidsai/cuvs/pull/1411
-* cuvs_static properly adds C include dir to target_include_directories by @robertmaynard in https://github.com/rapidsai/cuvs/pull/1426
-* Fix C/Python serialization for FP16. Add python tests by @lowener in https://github.com/rapidsai/cuvs/pull/1429
-* Properly guard usage of openmp function calls  by @robertmaynard in https://github.com/rapidsai/cuvs/pull/1435
-* Update nlohmann-json to 3.12.0 by @KyleFromNVIDIA in https://github.com/rapidsai/cuvs/pull/1449
-* Fix test params for filtered ivf_flat by @aamijar in https://github.com/rapidsai/cuvs/pull/1463
-* [REVIEW][Java][Bug] Fix `CuVSMatrix#getRow` to take strides into consideration by @ldematte in https://github.com/rapidsai/cuvs/pull/1442
-* Adding more tests and I find the compression settings are broken in the Golang API by @maxwbuckley in https://github.com/rapidsai/cuvs/pull/1472
-* Fix binary quantizer host transform bounds and stream order by @achirkin in https://github.com/rapidsai/cuvs/pull/1473
-* [BUG] Check if dynamic batching conf is null by @tarang-jain in https://github.com/rapidsai/cuvs/pull/1459
-* Include cagra search algorithms in libcuvs_static.a by @robertmaynard in https://github.com/rapidsai/cuvs/pull/1457
-* [Java] Disable flaky test `testFloatSerialization` by @mythrocks in https://github.com/rapidsai/cuvs/pull/1503
-* Add libaio to cuvs-bench-cpu, improve bench dependencies by @tarang-jain in https://github.com/rapidsai/cuvs/pull/1480
-* [Java] Fix `CagraBuildAndSearchIT` concurrent deallocation errors by @ldematte in https://github.com/rapidsai/cuvs/pull/1510
-* Pin Cython pre-3.2.0 and PyTest pre-9 by @jakirkham in https://github.com/rapidsai/cuvs/pull/1528
-* Fix a bug in compute_distance_00_generate.py by @enp1s0 in https://github.com/rapidsai/cuvs/pull/1532
-* Check stride information in from_dlpack by @benfred in https://github.com/rapidsai/cuvs/pull/1458
-* refactored update-version.sh to handle new branching strategy by @rockhowse in https://github.com/rapidsai/cuvs/pull/1535
-* fixed bug with update-version.sh by @rockhowse in https://github.com/rapidsai/cuvs/pull/1556
-* fix(ci): remove unsupported `name` parameter from custom job def by @gforsyth in https://github.com/rapidsai/cuvs/pull/1560
-* Remove need to have rapids_logger headers installed to use clib by @robertmaynard in https://github.com/rapidsai/cuvs/pull/1527
-* Fix BruteForce serialize test by @lowener in https://github.com/rapidsai/cuvs/pull/1568
-* [C] Fix: `cuvsRMMMemoryResourceReset` sets a valid resource by @ldematte in https://github.com/rapidsai/cuvs/pull/1540
-* Fix typo in date field by @vyasr in https://github.com/rapidsai/cuvs/pull/1563
-* SNMG ANN build with OpenMP nested parallelism by @viclafargue in https://github.com/rapidsai/cuvs/pull/1526
+* Allow compilation when OpenMP is disabled by @robertmaynard in https://github.com/nvidia/cuvs/pull/1346
+* Deallocation should be noexcept by @bdice in https://github.com/nvidia/cuvs/pull/1416
+* ANN_BENCH: Don't throw in noexcept do_deallocate by @achirkin in https://github.com/nvidia/cuvs/pull/1417
+* Remove unneeded cutlass public build/install dependency by @robertmaynard in https://github.com/nvidia/cuvs/pull/1411
+* cuvs_static properly adds C include dir to target_include_directories by @robertmaynard in https://github.com/nvidia/cuvs/pull/1426
+* Fix C/Python serialization for FP16. Add python tests by @lowener in https://github.com/nvidia/cuvs/pull/1429
+* Properly guard usage of openmp function calls  by @robertmaynard in https://github.com/nvidia/cuvs/pull/1435
+* Update nlohmann-json to 3.12.0 by @KyleFromNVIDIA in https://github.com/nvidia/cuvs/pull/1449
+* Fix test params for filtered ivf_flat by @aamijar in https://github.com/nvidia/cuvs/pull/1463
+* [REVIEW][Java][Bug] Fix `CuVSMatrix#getRow` to take strides into consideration by @ldematte in https://github.com/nvidia/cuvs/pull/1442
+* Adding more tests and I find the compression settings are broken in the Golang API by @maxwbuckley in https://github.com/nvidia/cuvs/pull/1472
+* Fix binary quantizer host transform bounds and stream order by @achirkin in https://github.com/nvidia/cuvs/pull/1473
+* [BUG] Check if dynamic batching conf is null by @tarang-jain in https://github.com/nvidia/cuvs/pull/1459
+* Include cagra search algorithms in libcuvs_static.a by @robertmaynard in https://github.com/nvidia/cuvs/pull/1457
+* [Java] Disable flaky test `testFloatSerialization` by @mythrocks in https://github.com/nvidia/cuvs/pull/1503
+* Add libaio to cuvs-bench-cpu, improve bench dependencies by @tarang-jain in https://github.com/nvidia/cuvs/pull/1480
+* [Java] Fix `CagraBuildAndSearchIT` concurrent deallocation errors by @ldematte in https://github.com/nvidia/cuvs/pull/1510
+* Pin Cython pre-3.2.0 and PyTest pre-9 by @jakirkham in https://github.com/nvidia/cuvs/pull/1528
+* Fix a bug in compute_distance_00_generate.py by @enp1s0 in https://github.com/nvidia/cuvs/pull/1532
+* Check stride information in from_dlpack by @benfred in https://github.com/nvidia/cuvs/pull/1458
+* refactored update-version.sh to handle new branching strategy by @rockhowse in https://github.com/nvidia/cuvs/pull/1535
+* fixed bug with update-version.sh by @rockhowse in https://github.com/nvidia/cuvs/pull/1556
+* fix(ci): remove unsupported `name` parameter from custom job def by @gforsyth in https://github.com/nvidia/cuvs/pull/1560
+* Remove need to have rapids_logger headers installed to use clib by @robertmaynard in https://github.com/nvidia/cuvs/pull/1527
+* Fix BruteForce serialize test by @lowener in https://github.com/nvidia/cuvs/pull/1568
+* [C] Fix: `cuvsRMMMemoryResourceReset` sets a valid resource by @ldematte in https://github.com/nvidia/cuvs/pull/1540
+* Fix typo in date field by @vyasr in https://github.com/nvidia/cuvs/pull/1563
+* SNMG ANN build with OpenMP nested parallelism by @viclafargue in https://github.com/nvidia/cuvs/pull/1526
 ### 📖 Documentation
-* Fix table of content documentation by @lowener in https://github.com/rapidsai/cuvs/pull/1427
-* Docs Spectral Clustering by @aamijar in https://github.com/rapidsai/cuvs/pull/1490
-* Use current system architecture in conda environment creation command by @bdice in https://github.com/rapidsai/cuvs/pull/1499
-* Updating README for release by @cjnolet in https://github.com/rapidsai/cuvs/pull/1584
+* Fix table of content documentation by @lowener in https://github.com/nvidia/cuvs/pull/1427
+* Docs Spectral Clustering by @aamijar in https://github.com/nvidia/cuvs/pull/1490
+* Use current system architecture in conda environment creation command by @bdice in https://github.com/nvidia/cuvs/pull/1499
+* Updating README for release by @cjnolet in https://github.com/nvidia/cuvs/pull/1584
 ### 🚀 New Features
-* Unify binding headers by @robertmaynard in https://github.com/rapidsai/cuvs/pull/1413
-* CAGRA: decouple source idx type from graph idx type and add a mapping between them by @achirkin in https://github.com/rapidsai/cuvs/pull/1251
-* Add SOVERSION information to libcuvs_c by @robertmaynard in https://github.com/rapidsai/cuvs/pull/1474
-* Spectral Clustering by @aamijar in https://github.com/rapidsai/cuvs/pull/1425
-* ANN_BENCH: integrate NVTX statistics by @achirkin in https://github.com/rapidsai/cuvs/pull/1529
-* Add Augmented Core Extraction Algorithm by @julianmi in https://github.com/rapidsai/cuvs/pull/1404
-* Extend CI to build and test x86 libcuvs_c tarballs by @robertmaynard in https://github.com/rapidsai/cuvs/pull/1524
-* Introduce `libcuvs-headers` and `libcuvs-static` CMake components and conda packages by @divyegala in https://github.com/rapidsai/cuvs/pull/1494
-* [Java] Bindings, tests and benchmarks for RMM pooled memory by @ldematte in https://github.com/rapidsai/cuvs/pull/1453
+* Unify binding headers by @robertmaynard in https://github.com/nvidia/cuvs/pull/1413
+* CAGRA: decouple source idx type from graph idx type and add a mapping between them by @achirkin in https://github.com/nvidia/cuvs/pull/1251
+* Add SOVERSION information to libcuvs_c by @robertmaynard in https://github.com/nvidia/cuvs/pull/1474
+* Spectral Clustering by @aamijar in https://github.com/nvidia/cuvs/pull/1425
+* ANN_BENCH: integrate NVTX statistics by @achirkin in https://github.com/nvidia/cuvs/pull/1529
+* Add Augmented Core Extraction Algorithm by @julianmi in https://github.com/nvidia/cuvs/pull/1404
+* Extend CI to build and test x86 libcuvs_c tarballs by @robertmaynard in https://github.com/nvidia/cuvs/pull/1524
+* Introduce `libcuvs-headers` and `libcuvs-static` CMake components and conda packages by @divyegala in https://github.com/nvidia/cuvs/pull/1494
+* [Java] Bindings, tests and benchmarks for RMM pooled memory by @ldematte in https://github.com/nvidia/cuvs/pull/1453
 ### 🛠️ Improvements
-* Update `RAPIDS_BRANCH`, codify changes in `update-version.sh` by @KyleFromNVIDIA in https://github.com/rapidsai/cuvs/pull/1368
-* Move eigen_solvers from raft by @aamijar in https://github.com/rapidsai/cuvs/pull/1402
-* CosineExpanded Distance Metric for CAGRA by @tarang-jain in https://github.com/rapidsai/cuvs/pull/197
-* Enable `sccache-dist` connection pool by @trxcllnt in https://github.com/rapidsai/cuvs/pull/1431
-* Use pinned_host_memory_resource instead of pinned_memory_resource. by @bdice in https://github.com/rapidsai/cuvs/pull/1434
-* Use main in RAPIDS_BRANCH by @bdice in https://github.com/rapidsai/cuvs/pull/1439
-* Use main shared-workflows branch by @bdice in https://github.com/rapidsai/cuvs/pull/1444
-* [Review] ScaNN: Add option for AVQ/Noise Shaping to bfloat16 quantization by @rmaschal in https://github.com/rapidsai/cuvs/pull/1354
-* Add NVTX annotations to CAGRA knn graph build stage by @achirkin in https://github.com/rapidsai/cuvs/pull/1443
-* [Review][C] Export the ability to get/set the log level to the C API by @ldematte in https://github.com/rapidsai/cuvs/pull/1375
-* [Java] Test indexing and serialization with integral (byte) dataset by @ldematte in https://github.com/rapidsai/cuvs/pull/1366
-* Use SPDX for all copyright headers by @KyleFromNVIDIA in https://github.com/rapidsai/cuvs/pull/1446
-* [REVIEW][Java] Log level api by @ldematte in https://github.com/rapidsai/cuvs/pull/1376
-* [Review] [Java] Disable flaky Java tests to reduce CI churn by @mythrocks in https://github.com/rapidsai/cuvs/pull/1469
-* Refactor rapids_cpm_package_details to rapids_cpm_package_info by @bdice in https://github.com/rapidsai/cuvs/pull/1433
-* Fix golang test name and add more logging and error checking. by @maxwbuckley in https://github.com/rapidsai/cuvs/pull/1471
-* Use an explicit std::min for byte alignment calculation by @maxwbuckley in https://github.com/rapidsai/cuvs/pull/1465
-* Replace custom cuda_pinned_resource with RMM's pinned_host_memory_resource by @bdice in https://github.com/rapidsai/cuvs/pull/1466
-* Add patch for FAISS memory resources by @bdice in https://github.com/rapidsai/cuvs/pull/1477
-* Improved CAGRA build parameter heuristics by @achirkin in https://github.com/rapidsai/cuvs/pull/1448
-* Decouple C++ library from C library by @divyegala in https://github.com/rapidsai/cuvs/pull/1488
-* Use `RAPIDS_BRANCH` in cmake-format invocations that need rapids-cmake configs by @bdice in https://github.com/rapidsai/cuvs/pull/1475
-* eigen `tolerance` option Spectral Embedding by @aamijar in https://github.com/rapidsai/cuvs/pull/1493
-* Update to CUDA 13.0.2 by @bdice in https://github.com/rapidsai/cuvs/pull/1506
-* Single Linkage to Use all_neighbors API to build the KNN graph by @tarang-jain in https://github.com/rapidsai/cuvs/pull/1507
-* Migrate to new CCCL memory resource interface by @bdice in https://github.com/rapidsai/cuvs/pull/1502
-* Remove unused headers scann by @lowener in https://github.com/rapidsai/cuvs/pull/1508
-* Update RMM includes from `<rmm/mr/device/*>` to `<rmm/mr/*>` by @bdice in https://github.com/rapidsai/cuvs/pull/1538
-* Use ruff-check, ruff-format instead of black, flake8 by @KyleFromNVIDIA in https://github.com/rapidsai/cuvs/pull/1500
-* Set memory pool from Python multi-GPU resource by @viclafargue in https://github.com/rapidsai/cuvs/pull/1530
-* [Java] Fix format with spotless by @ldematte in https://github.com/rapidsai/cuvs/pull/1539
-* fix bad version update by @trxcllnt in https://github.com/rapidsai/cuvs/pull/1555
-* Deduplicate KMeans instantiations by @divyegala in https://github.com/rapidsai/cuvs/pull/1565
-* Improve memory usage in `build_mr_linkage` by @jinsolp in https://github.com/rapidsai/cuvs/pull/1550
-* Use `sccache-dist` build cluster for conda and wheel builds by @trxcllnt in https://github.com/rapidsai/cuvs/pull/1495
-* [Java] One PinnedMemoryBuffer per CuVSResourcesImpl by @ldematte in https://github.com/rapidsai/cuvs/pull/1441
-* [Java] Relax cuVS version matching by @ldematte in https://github.com/rapidsai/cuvs/pull/1544
+* Update `RAPIDS_BRANCH`, codify changes in `update-version.sh` by @KyleFromNVIDIA in https://github.com/nvidia/cuvs/pull/1368
+* Move eigen_solvers from raft by @aamijar in https://github.com/nvidia/cuvs/pull/1402
+* CosineExpanded Distance Metric for CAGRA by @tarang-jain in https://github.com/nvidia/cuvs/pull/197
+* Enable `sccache-dist` connection pool by @trxcllnt in https://github.com/nvidia/cuvs/pull/1431
+* Use pinned_host_memory_resource instead of pinned_memory_resource. by @bdice in https://github.com/nvidia/cuvs/pull/1434
+* Use main in RAPIDS_BRANCH by @bdice in https://github.com/nvidia/cuvs/pull/1439
+* Use main shared-workflows branch by @bdice in https://github.com/nvidia/cuvs/pull/1444
+* [Review] ScaNN: Add option for AVQ/Noise Shaping to bfloat16 quantization by @rmaschal in https://github.com/nvidia/cuvs/pull/1354
+* Add NVTX annotations to CAGRA knn graph build stage by @achirkin in https://github.com/nvidia/cuvs/pull/1443
+* [Review][C] Export the ability to get/set the log level to the C API by @ldematte in https://github.com/nvidia/cuvs/pull/1375
+* [Java] Test indexing and serialization with integral (byte) dataset by @ldematte in https://github.com/nvidia/cuvs/pull/1366
+* Use SPDX for all copyright headers by @KyleFromNVIDIA in https://github.com/nvidia/cuvs/pull/1446
+* [REVIEW][Java] Log level api by @ldematte in https://github.com/nvidia/cuvs/pull/1376
+* [Review] [Java] Disable flaky Java tests to reduce CI churn by @mythrocks in https://github.com/nvidia/cuvs/pull/1469
+* Refactor rapids_cpm_package_details to rapids_cpm_package_info by @bdice in https://github.com/nvidia/cuvs/pull/1433
+* Fix golang test name and add more logging and error checking. by @maxwbuckley in https://github.com/nvidia/cuvs/pull/1471
+* Use an explicit std::min for byte alignment calculation by @maxwbuckley in https://github.com/nvidia/cuvs/pull/1465
+* Replace custom cuda_pinned_resource with RMM's pinned_host_memory_resource by @bdice in https://github.com/nvidia/cuvs/pull/1466
+* Add patch for FAISS memory resources by @bdice in https://github.com/nvidia/cuvs/pull/1477
+* Improved CAGRA build parameter heuristics by @achirkin in https://github.com/nvidia/cuvs/pull/1448
+* Decouple C++ library from C library by @divyegala in https://github.com/nvidia/cuvs/pull/1488
+* Use `RAPIDS_BRANCH` in cmake-format invocations that need rapids-cmake configs by @bdice in https://github.com/nvidia/cuvs/pull/1475
+* eigen `tolerance` option Spectral Embedding by @aamijar in https://github.com/nvidia/cuvs/pull/1493
+* Update to CUDA 13.0.2 by @bdice in https://github.com/nvidia/cuvs/pull/1506
+* Single Linkage to Use all_neighbors API to build the KNN graph by @tarang-jain in https://github.com/nvidia/cuvs/pull/1507
+* Migrate to new CCCL memory resource interface by @bdice in https://github.com/nvidia/cuvs/pull/1502
+* Remove unused headers scann by @lowener in https://github.com/nvidia/cuvs/pull/1508
+* Update RMM includes from `<rmm/mr/device/*>` to `<rmm/mr/*>` by @bdice in https://github.com/nvidia/cuvs/pull/1538
+* Use ruff-check, ruff-format instead of black, flake8 by @KyleFromNVIDIA in https://github.com/nvidia/cuvs/pull/1500
+* Set memory pool from Python multi-GPU resource by @viclafargue in https://github.com/nvidia/cuvs/pull/1530
+* [Java] Fix format with spotless by @ldematte in https://github.com/nvidia/cuvs/pull/1539
+* fix bad version update by @trxcllnt in https://github.com/nvidia/cuvs/pull/1555
+* Deduplicate KMeans instantiations by @divyegala in https://github.com/nvidia/cuvs/pull/1565
+* Improve memory usage in `build_mr_linkage` by @jinsolp in https://github.com/nvidia/cuvs/pull/1550
+* Use `sccache-dist` build cluster for conda and wheel builds by @trxcllnt in https://github.com/nvidia/cuvs/pull/1495
+* [Java] One PinnedMemoryBuffer per CuVSResourcesImpl by @ldematte in https://github.com/nvidia/cuvs/pull/1441
+* [Java] Relax cuVS version matching by @ldematte in https://github.com/nvidia/cuvs/pull/1544
 
 ## New Contributors
-* @maxwbuckley made their first contribution in https://github.com/rapidsai/cuvs/pull/1471
-* @rockhowse made their first contribution in https://github.com/rapidsai/cuvs/pull/1535
+* @maxwbuckley made their first contribution in https://github.com/nvidia/cuvs/pull/1471
+* @rockhowse made their first contribution in https://github.com/nvidia/cuvs/pull/1535
 
-**Full Changelog**: https://github.com/rapidsai/cuvs/compare/v25.12.00a...release/25.12
+**Full Changelog**: https://github.com/nvidia/cuvs/compare/v25.12.00a...release/25.12
 
 # cuvs 25.10.00 (8 Oct 2025)
 
 ## 🚨 Breaking Changes
 
-- [Java] Uniform toHost/toDevice to work across all CuVSMatrix classes ([#1328](https://github.com/rapidsai/cuvs/pull/1328)) [@ldematte](https://github.com/ldematte)
-- Use int64_t for getters in the cagra/ivf_flat c-api ([#1272](https://github.com/rapidsai/cuvs/pull/1272)) [@benfred](https://github.com/benfred)
-- [REVIEW][Java] Rename destroyIndex() to close(), extend AutoCloseable ([#1252](https://github.com/rapidsai/cuvs/pull/1252)) [@ldematte](https://github.com/ldematte)
-- Removing deprecated batching code specific to NN Descent ([#1249](https://github.com/rapidsai/cuvs/pull/1249)) [@jinsolp](https://github.com/jinsolp)
-- MG C API ([#1160](https://github.com/rapidsai/cuvs/pull/1160)) [@viclafargue](https://github.com/viclafargue)
+- [Java] Uniform toHost/toDevice to work across all CuVSMatrix classes ([#1328](https://github.com/nvidia/cuvs/pull/1328)) [@ldematte](https://github.com/ldematte)
+- Use int64_t for getters in the cagra/ivf_flat c-api ([#1272](https://github.com/nvidia/cuvs/pull/1272)) [@benfred](https://github.com/benfred)
+- [REVIEW][Java] Rename destroyIndex() to close(), extend AutoCloseable ([#1252](https://github.com/nvidia/cuvs/pull/1252)) [@ldematte](https://github.com/ldematte)
+- Removing deprecated batching code specific to NN Descent ([#1249](https://github.com/nvidia/cuvs/pull/1249)) [@jinsolp](https://github.com/jinsolp)
+- MG C API ([#1160](https://github.com/nvidia/cuvs/pull/1160)) [@viclafargue](https://github.com/viclafargue)
 
 ## 🐛 Bug Fixes
 
-- Update `cpp/cmake/config.json` for `ConfigureTest()` ([#1385](https://github.com/rapidsai/cuvs/pull/1385)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
-- Fix k-means++ root rank race condition ([#1359](https://github.com/rapidsai/cuvs/pull/1359)) [@csadorf](https://github.com/csadorf)
-- Fix inadvertent uses of copy constructor in mdarrays across cuVS ([#1330](https://github.com/rapidsai/cuvs/pull/1330)) [@achirkin](https://github.com/achirkin)
-- Refactor CMakeLists to support static only builds of libcuvs ([#1317](https://github.com/rapidsai/cuvs/pull/1317)) [@robertmaynard](https://github.com/robertmaynard)
-- Fix pq_compressed file created by SSD Vamana build ([#1313](https://github.com/rapidsai/cuvs/pull/1313)) [@bkarsin](https://github.com/bkarsin)
-- Revert &quot;[Java]Binary and scalar quantization ([#1104)&quot; (#1274](https://github.com/rapidsai/cuvs/pull/1104)&quot; (#1274)) [@mythrocks](https://github.com/mythrocks)
-- Fix balanced kmeans API ([#1268](https://github.com/rapidsai/cuvs/pull/1268)) [@enp1s0](https://github.com/enp1s0)
-- [BUG] NEIGHBORS_ALL_NEIGHBORS_TEST build ignores --no-mg ([#1230](https://github.com/rapidsai/cuvs/pull/1230)) [@enp1s0](https://github.com/enp1s0)
-- Add error handling for CPU-only bench ([#1203](https://github.com/rapidsai/cuvs/pull/1203)) [@lowener](https://github.com/lowener)
-- Make duplicate removal in all neighbors robust to distance drift across batches ([#1185](https://github.com/rapidsai/cuvs/pull/1185)) [@jinsolp](https://github.com/jinsolp)
-- Fix single GPU sharded search merge ([#1094](https://github.com/rapidsai/cuvs/pull/1094)) [@viclafargue](https://github.com/viclafargue)
-- Processing mutual reachability dist in connect_knn_graph for host data ([#1093](https://github.com/rapidsai/cuvs/pull/1093)) [@jinsolp](https://github.com/jinsolp)
-- [Java][Fix] Multithreaded querying fails without synchronization ([#1082](https://github.com/rapidsai/cuvs/pull/1082)) [@chatman](https://github.com/chatman)
+- Update `cpp/cmake/config.json` for `ConfigureTest()` ([#1385](https://github.com/nvidia/cuvs/pull/1385)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
+- Fix k-means++ root rank race condition ([#1359](https://github.com/nvidia/cuvs/pull/1359)) [@csadorf](https://github.com/csadorf)
+- Fix inadvertent uses of copy constructor in mdarrays across cuVS ([#1330](https://github.com/nvidia/cuvs/pull/1330)) [@achirkin](https://github.com/achirkin)
+- Refactor CMakeLists to support static only builds of libcuvs ([#1317](https://github.com/nvidia/cuvs/pull/1317)) [@robertmaynard](https://github.com/robertmaynard)
+- Fix pq_compressed file created by SSD Vamana build ([#1313](https://github.com/nvidia/cuvs/pull/1313)) [@bkarsin](https://github.com/bkarsin)
+- Revert &quot;[Java]Binary and scalar quantization ([#1104)&quot; (#1274](https://github.com/nvidia/cuvs/pull/1104)&quot; (#1274)) [@mythrocks](https://github.com/mythrocks)
+- Fix balanced kmeans API ([#1268](https://github.com/nvidia/cuvs/pull/1268)) [@enp1s0](https://github.com/enp1s0)
+- [BUG] NEIGHBORS_ALL_NEIGHBORS_TEST build ignores --no-mg ([#1230](https://github.com/nvidia/cuvs/pull/1230)) [@enp1s0](https://github.com/enp1s0)
+- Add error handling for CPU-only bench ([#1203](https://github.com/nvidia/cuvs/pull/1203)) [@lowener](https://github.com/lowener)
+- Make duplicate removal in all neighbors robust to distance drift across batches ([#1185](https://github.com/nvidia/cuvs/pull/1185)) [@jinsolp](https://github.com/jinsolp)
+- Fix single GPU sharded search merge ([#1094](https://github.com/nvidia/cuvs/pull/1094)) [@viclafargue](https://github.com/viclafargue)
+- Processing mutual reachability dist in connect_knn_graph for host data ([#1093](https://github.com/nvidia/cuvs/pull/1093)) [@jinsolp](https://github.com/jinsolp)
+- [Java][Fix] Multithreaded querying fails without synchronization ([#1082](https://github.com/nvidia/cuvs/pull/1082)) [@chatman](https://github.com/chatman)
 
 ## 📖 Documentation
 
-- Add Vamana C rst Docs to toctree ([#1399](https://github.com/rapidsai/cuvs/pull/1399)) [@tarang-jain](https://github.com/tarang-jain)
-- Regroup overloads in docs ([#1377](https://github.com/rapidsai/cuvs/pull/1377)) [@lowener](https://github.com/lowener)
-- [DOCS] Update IVF Docs with Supported Distance Metrics ([#1365](https://github.com/rapidsai/cuvs/pull/1365)) [@tarang-jain](https://github.com/tarang-jain)
-- Add note about Linux-only support for cuVS pre-compiled packages ([#1306](https://github.com/rapidsai/cuvs/pull/1306)) [@stic](https://github.com/stic)
-- Documentation Fixes ([#1301](https://github.com/rapidsai/cuvs/pull/1301)) [@benfred](https://github.com/benfred)
-- Docs for spectral embedding ([#1299](https://github.com/rapidsai/cuvs/pull/1299)) [@aamijar](https://github.com/aamijar)
-- Improve NN Descent documentation ([#1246](https://github.com/rapidsai/cuvs/pull/1246)) [@jinsolp](https://github.com/jinsolp)
-- Add NN.extend and kmeans to Python doc ([#1217](https://github.com/rapidsai/cuvs/pull/1217)) [@lowener](https://github.com/lowener)
+- Add Vamana C rst Docs to toctree ([#1399](https://github.com/nvidia/cuvs/pull/1399)) [@tarang-jain](https://github.com/tarang-jain)
+- Regroup overloads in docs ([#1377](https://github.com/nvidia/cuvs/pull/1377)) [@lowener](https://github.com/lowener)
+- [DOCS] Update IVF Docs with Supported Distance Metrics ([#1365](https://github.com/nvidia/cuvs/pull/1365)) [@tarang-jain](https://github.com/tarang-jain)
+- Add note about Linux-only support for cuVS pre-compiled packages ([#1306](https://github.com/nvidia/cuvs/pull/1306)) [@stic](https://github.com/stic)
+- Documentation Fixes ([#1301](https://github.com/nvidia/cuvs/pull/1301)) [@benfred](https://github.com/benfred)
+- Docs for spectral embedding ([#1299](https://github.com/nvidia/cuvs/pull/1299)) [@aamijar](https://github.com/aamijar)
+- Improve NN Descent documentation ([#1246](https://github.com/nvidia/cuvs/pull/1246)) [@jinsolp](https://github.com/jinsolp)
+- Add NN.extend and kmeans to Python doc ([#1217](https://github.com/nvidia/cuvs/pull/1217)) [@lowener](https://github.com/lowener)
 
 ## 🚀 New Features
 
-- [REVIEW] [Java] Option to build fat-jars with native dependencies included ([#1296](https://github.com/rapidsai/cuvs/pull/1296)) [@mythrocks](https://github.com/mythrocks)
-- Updates needed when building with CUDA 13 ([#1219](https://github.com/rapidsai/cuvs/pull/1219)) [@robertmaynard](https://github.com/robertmaynard)
-- [Java] Add CAGRA index graph accessor/build from graph (host memory) ([#1216](https://github.com/rapidsai/cuvs/pull/1216)) [@ldematte](https://github.com/ldematte)
-- [Feat] Add Dockerfile for reproducible installation ([#1195](https://github.com/rapidsai/cuvs/pull/1195)) [@rhdong](https://github.com/rhdong)
-- Vamana C / Python API ([#1112](https://github.com/rapidsai/cuvs/pull/1112)) [@tarang-jain](https://github.com/tarang-jain)
-- Mutual Reachability in all-neighbors API ([#1016](https://github.com/rapidsai/cuvs/pull/1016)) [@jinsolp](https://github.com/jinsolp)
-- CAGRA Build + DiskANN Search cuvs-bench Wrapper ([#899](https://github.com/rapidsai/cuvs/pull/899)) [@tarang-jain](https://github.com/tarang-jain)
+- [REVIEW] [Java] Option to build fat-jars with native dependencies included ([#1296](https://github.com/nvidia/cuvs/pull/1296)) [@mythrocks](https://github.com/mythrocks)
+- Updates needed when building with CUDA 13 ([#1219](https://github.com/nvidia/cuvs/pull/1219)) [@robertmaynard](https://github.com/robertmaynard)
+- [Java] Add CAGRA index graph accessor/build from graph (host memory) ([#1216](https://github.com/nvidia/cuvs/pull/1216)) [@ldematte](https://github.com/ldematte)
+- [Feat] Add Dockerfile for reproducible installation ([#1195](https://github.com/nvidia/cuvs/pull/1195)) [@rhdong](https://github.com/rhdong)
+- Vamana C / Python API ([#1112](https://github.com/nvidia/cuvs/pull/1112)) [@tarang-jain](https://github.com/tarang-jain)
+- Mutual Reachability in all-neighbors API ([#1016](https://github.com/nvidia/cuvs/pull/1016)) [@jinsolp](https://github.com/jinsolp)
+- CAGRA Build + DiskANN Search cuvs-bench Wrapper ([#899](https://github.com/nvidia/cuvs/pull/899)) [@tarang-jain](https://github.com/tarang-jain)
 
 ## 🛠️ Improvements
 
-- Patch FAISS for missing Thrust includes ([#1398](https://github.com/rapidsai/cuvs/pull/1398)) [@bdice](https://github.com/bdice)
-- Empty commit to trigger a build ([#1363](https://github.com/rapidsai/cuvs/pull/1363)) [@msarahan](https://github.com/msarahan)
-- [Review][Java] Refactor: extract interface from CuVSMatrixBaseImpl ([#1361](https://github.com/rapidsai/cuvs/pull/1361)) [@ldematte](https://github.com/ldematte)
-- use CUDA 13.0.1 CI images ([#1353](https://github.com/rapidsai/cuvs/pull/1353)) [@jameslamb](https://github.com/jameslamb)
-- Treat warnings as errors in doc builds + MG ANNs Python API doc update ([#1350](https://github.com/rapidsai/cuvs/pull/1350)) [@viclafargue](https://github.com/viclafargue)
-- [Java] Support row strides in CuVSMatrix ([#1345](https://github.com/rapidsai/cuvs/pull/1345)) [@ldematte](https://github.com/ldematte)
-- [Java] Adding tests to use CuVSDeviceMatrix (device memory) directly as a CagraIndex input dataset ([#1340](https://github.com/rapidsai/cuvs/pull/1340)) [@ldematte](https://github.com/ldematte)
-- Improve performance of assigning clusters in batched all_neighbors ([#1336](https://github.com/rapidsai/cuvs/pull/1336)) [@jinsolp](https://github.com/jinsolp)
-- Configure repo for automatic release notes generation ([#1334](https://github.com/rapidsai/cuvs/pull/1334)) [@AyodeAwe](https://github.com/AyodeAwe)
-- [Java] Buffered device matrix builder ([#1332](https://github.com/rapidsai/cuvs/pull/1332)) [@ldematte](https://github.com/ldematte)
-- Remove UCX-Py/UCXX from release script ([#1331](https://github.com/rapidsai/cuvs/pull/1331)) [@pentschev](https://github.com/pentschev)
-- [Java] Uniform toHost/toDevice to work across all CuVSMatrix classes ([#1328](https://github.com/rapidsai/cuvs/pull/1328)) [@ldematte](https://github.com/ldematte)
-- [Java] Add `libcuvs` &lt;-&gt; cuvs-java version check ([#1327](https://github.com/rapidsai/cuvs/pull/1327)) [@ldematte](https://github.com/ldematte)
-- update dependencies: use cuda-toolkit wheels ([#1326](https://github.com/rapidsai/cuvs/pull/1326)) [@jameslamb](https://github.com/jameslamb)
-- Add cagra.extend bindings for python ([#1324](https://github.com/rapidsai/cuvs/pull/1324)) [@benfred](https://github.com/benfred)
-- [Java] Make `cudaGetDeviceProperties` compatible with CUDA 12 and 13 based on symbol presence ([#1323](https://github.com/rapidsai/cuvs/pull/1323)) [@ldematte](https://github.com/ldematte)
-- Use branch-25.10 again ([#1319](https://github.com/rapidsai/cuvs/pull/1319)) [@jameslamb](https://github.com/jameslamb)
-- [Review][Java] Add detailed error message for `libcuvs` load failure to UnsupportedProvider/UnsupportedOperationExceptions ([#1316](https://github.com/rapidsai/cuvs/pull/1316)) [@ldematte](https://github.com/ldematte)
-- [Java] Add reason(s) to UnsupportedProvider/UnsupportedOperationExceptions ([#1314](https://github.com/rapidsai/cuvs/pull/1314)) [@ldematte](https://github.com/ldematte)
-- MG Python API ([#1307](https://github.com/rapidsai/cuvs/pull/1307)) [@viclafargue](https://github.com/viclafargue)
-- Fix redundant memset ([#1305](https://github.com/rapidsai/cuvs/pull/1305)) [@vinaydes](https://github.com/vinaydes)
-- Expose pq-centers to C and Python ([#1303](https://github.com/rapidsai/cuvs/pull/1303)) [@benfred](https://github.com/benfred)
-- Port `raft::neighbors::epsilon_neighborhood` to cuvs ([#1294](https://github.com/rapidsai/cuvs/pull/1294)) [@aamijar](https://github.com/aamijar)
-- ScaNN: Overlapped gather for AVQ ([#1286](https://github.com/rapidsai/cuvs/pull/1286)) [@rmaschal](https://github.com/rmaschal)
-- Update rapids-dependency-file-generator ([#1285](https://github.com/rapidsai/cuvs/pull/1285)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
-- [Review][Java] Expand `CagraIndex#search` to accept more types (int8 and int32) ([#1283](https://github.com/rapidsai/cuvs/pull/1283)) [@ldematte](https://github.com/ldematte)
-- All neighbors C and Python bindings ([#1282](https://github.com/rapidsai/cuvs/pull/1282)) [@viclafargue](https://github.com/viclafargue)
-- Build and test with CUDA 13.0.0 ([#1273](https://github.com/rapidsai/cuvs/pull/1273)) [@jameslamb](https://github.com/jameslamb)
-- Use int64_t for getters in the cagra/ivf_flat c-api ([#1272](https://github.com/rapidsai/cuvs/pull/1272)) [@benfred](https://github.com/benfred)
-- [Java][C] Expose GPUInfo ([#1267](https://github.com/rapidsai/cuvs/pull/1267)) [@ldematte](https://github.com/ldematte)
-- Add rust bindings for kmeans ([#1266](https://github.com/rapidsai/cuvs/pull/1266)) [@benfred](https://github.com/benfred)
-- [REVIEW] Add a public API for CAGRA graph optimize ([#1260](https://github.com/rapidsai/cuvs/pull/1260)) [@abc99lr](https://github.com/abc99lr)
-- Fix debug build ([#1258](https://github.com/rapidsai/cuvs/pull/1258)) [@lowener](https://github.com/lowener)
-- Run `cargo fmt` in the pre-commit hooks ([#1255](https://github.com/rapidsai/cuvs/pull/1255)) [@benfred](https://github.com/benfred)
-- [REVIEW][Java] Rename destroyIndex() to close(), extend AutoCloseable ([#1252](https://github.com/rapidsai/cuvs/pull/1252)) [@ldematte](https://github.com/ldematte)
-- Removing deprecated batching code specific to NN Descent ([#1249](https://github.com/rapidsai/cuvs/pull/1249)) [@jinsolp](https://github.com/jinsolp)
-- Pin Latest Faiss Version ([#1247](https://github.com/rapidsai/cuvs/pull/1247)) [@tarang-jain](https://github.com/tarang-jain)
-- Use build cluster in devcontainers ([#1240](https://github.com/rapidsai/cuvs/pull/1240)) [@trxcllnt](https://github.com/trxcllnt)
-- Use rapids_cuda_enable_fatbin_compression ([#1239](https://github.com/rapidsai/cuvs/pull/1239)) [@robertmaynard](https://github.com/robertmaynard)
-- [Improvement] Replace Calls to thrust::transform with raft::linalg ([#1238](https://github.com/rapidsai/cuvs/pull/1238)) [@tarang-jain](https://github.com/tarang-jain)
-- [Java] CuVSMatrix for device memory ([#1232](https://github.com/rapidsai/cuvs/pull/1232)) [@ldematte](https://github.com/ldematte)
-- Drop log level for CAGRA trace messages ([#1229](https://github.com/rapidsai/cuvs/pull/1229)) [@mythrocks](https://github.com/mythrocks)
-- Update rapids_config to handle user defined branch name ([#1227](https://github.com/rapidsai/cuvs/pull/1227)) [@robertmaynard](https://github.com/robertmaynard)
-- Reduce noise in Java tests ([#1226](https://github.com/rapidsai/cuvs/pull/1226)) [@mythrocks](https://github.com/mythrocks)
-- Update another instance of rapids-build-backend ([#1225](https://github.com/rapidsai/cuvs/pull/1225)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
-- Fix missing sync_stream in ScaNN build ([#1224](https://github.com/rapidsai/cuvs/pull/1224)) [@rmaschal](https://github.com/rmaschal)
-- [Java] Exception-safe RMM Allocations ([#1215](https://github.com/rapidsai/cuvs/pull/1215)) [@mythrocks](https://github.com/mythrocks)
-- Update rapids-build-backend to 0.4.1 ([#1214](https://github.com/rapidsai/cuvs/pull/1214)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
-- Removing filter templates in ivfpq for binary size reduction ([#1211](https://github.com/rapidsai/cuvs/pull/1211)) [@jinsolp](https://github.com/jinsolp)
-- ci(labeler): update labeler action to [@v5 ([#1210](https://github.com/rapidsai/cuvs/pull/1210)) @gforsyth](https://github.com/v5 ([#1210](https://github.com/rapidsai/cuvs/pull/1210)) @gforsyth)
-- [Java] Add decorator for synchronized CuVSResource access ([#1209](https://github.com/rapidsai/cuvs/pull/1209)) [@ldematte](https://github.com/ldematte)
-- Remove unused templated instances ([#1204](https://github.com/rapidsai/cuvs/pull/1204)) [@vinaydes](https://github.com/vinaydes)
-- Spectral Embedding precomputed graph api ([#1197](https://github.com/rapidsai/cuvs/pull/1197)) [@aamijar](https://github.com/aamijar)
-- cuvs RAPIDS dependency now use the new rapids-cmake branch info ([#1189](https://github.com/rapidsai/cuvs/pull/1189)) [@robertmaynard](https://github.com/robertmaynard)
-- Allow latest OS in devcontainers ([#1169](https://github.com/rapidsai/cuvs/pull/1169)) [@bdice](https://github.com/bdice)
-- MG C API ([#1160](https://github.com/rapidsai/cuvs/pull/1160)) [@viclafargue](https://github.com/viclafargue)
-- Merge support for tiered index ([#1155](https://github.com/rapidsai/cuvs/pull/1155)) [@benfred](https://github.com/benfred)
-- Update build infra to support new branching strategy ([#1143](https://github.com/rapidsai/cuvs/pull/1143)) [@robertmaynard](https://github.com/robertmaynard)
-- [Java]Binary and scalar quantization ([#1104](https://github.com/rapidsai/cuvs/pull/1104)) [@punAhuja](https://github.com/punAhuja)
-- Improve Vamana index build performance and recall ([#1032](https://github.com/rapidsai/cuvs/pull/1032)) [@bkarsin](https://github.com/bkarsin)
-- Use GCC 14 in conda builds. ([#1030](https://github.com/rapidsai/cuvs/pull/1030)) [@vyasr](https://github.com/vyasr)
-- Update SNMG ANN API testing ([#1023](https://github.com/rapidsai/cuvs/pull/1023)) [@viclafargue](https://github.com/viclafargue)
+- Patch FAISS for missing Thrust includes ([#1398](https://github.com/nvidia/cuvs/pull/1398)) [@bdice](https://github.com/bdice)
+- Empty commit to trigger a build ([#1363](https://github.com/nvidia/cuvs/pull/1363)) [@msarahan](https://github.com/msarahan)
+- [Review][Java] Refactor: extract interface from CuVSMatrixBaseImpl ([#1361](https://github.com/nvidia/cuvs/pull/1361)) [@ldematte](https://github.com/ldematte)
+- use CUDA 13.0.1 CI images ([#1353](https://github.com/nvidia/cuvs/pull/1353)) [@jameslamb](https://github.com/jameslamb)
+- Treat warnings as errors in doc builds + MG ANNs Python API doc update ([#1350](https://github.com/nvidia/cuvs/pull/1350)) [@viclafargue](https://github.com/viclafargue)
+- [Java] Support row strides in CuVSMatrix ([#1345](https://github.com/nvidia/cuvs/pull/1345)) [@ldematte](https://github.com/ldematte)
+- [Java] Adding tests to use CuVSDeviceMatrix (device memory) directly as a CagraIndex input dataset ([#1340](https://github.com/nvidia/cuvs/pull/1340)) [@ldematte](https://github.com/ldematte)
+- Improve performance of assigning clusters in batched all_neighbors ([#1336](https://github.com/nvidia/cuvs/pull/1336)) [@jinsolp](https://github.com/jinsolp)
+- Configure repo for automatic release notes generation ([#1334](https://github.com/nvidia/cuvs/pull/1334)) [@AyodeAwe](https://github.com/AyodeAwe)
+- [Java] Buffered device matrix builder ([#1332](https://github.com/nvidia/cuvs/pull/1332)) [@ldematte](https://github.com/ldematte)
+- Remove UCX-Py/UCXX from release script ([#1331](https://github.com/nvidia/cuvs/pull/1331)) [@pentschev](https://github.com/pentschev)
+- [Java] Uniform toHost/toDevice to work across all CuVSMatrix classes ([#1328](https://github.com/nvidia/cuvs/pull/1328)) [@ldematte](https://github.com/ldematte)
+- [Java] Add `libcuvs` &lt;-&gt; cuvs-java version check ([#1327](https://github.com/nvidia/cuvs/pull/1327)) [@ldematte](https://github.com/ldematte)
+- update dependencies: use cuda-toolkit wheels ([#1326](https://github.com/nvidia/cuvs/pull/1326)) [@jameslamb](https://github.com/jameslamb)
+- Add cagra.extend bindings for python ([#1324](https://github.com/nvidia/cuvs/pull/1324)) [@benfred](https://github.com/benfred)
+- [Java] Make `cudaGetDeviceProperties` compatible with CUDA 12 and 13 based on symbol presence ([#1323](https://github.com/nvidia/cuvs/pull/1323)) [@ldematte](https://github.com/ldematte)
+- Use branch-25.10 again ([#1319](https://github.com/nvidia/cuvs/pull/1319)) [@jameslamb](https://github.com/jameslamb)
+- [Review][Java] Add detailed error message for `libcuvs` load failure to UnsupportedProvider/UnsupportedOperationExceptions ([#1316](https://github.com/nvidia/cuvs/pull/1316)) [@ldematte](https://github.com/ldematte)
+- [Java] Add reason(s) to UnsupportedProvider/UnsupportedOperationExceptions ([#1314](https://github.com/nvidia/cuvs/pull/1314)) [@ldematte](https://github.com/ldematte)
+- MG Python API ([#1307](https://github.com/nvidia/cuvs/pull/1307)) [@viclafargue](https://github.com/viclafargue)
+- Fix redundant memset ([#1305](https://github.com/nvidia/cuvs/pull/1305)) [@vinaydes](https://github.com/vinaydes)
+- Expose pq-centers to C and Python ([#1303](https://github.com/nvidia/cuvs/pull/1303)) [@benfred](https://github.com/benfred)
+- Port `raft::neighbors::epsilon_neighborhood` to cuvs ([#1294](https://github.com/nvidia/cuvs/pull/1294)) [@aamijar](https://github.com/aamijar)
+- ScaNN: Overlapped gather for AVQ ([#1286](https://github.com/nvidia/cuvs/pull/1286)) [@rmaschal](https://github.com/rmaschal)
+- Update rapids-dependency-file-generator ([#1285](https://github.com/nvidia/cuvs/pull/1285)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
+- [Review][Java] Expand `CagraIndex#search` to accept more types (int8 and int32) ([#1283](https://github.com/nvidia/cuvs/pull/1283)) [@ldematte](https://github.com/ldematte)
+- All neighbors C and Python bindings ([#1282](https://github.com/nvidia/cuvs/pull/1282)) [@viclafargue](https://github.com/viclafargue)
+- Build and test with CUDA 13.0.0 ([#1273](https://github.com/nvidia/cuvs/pull/1273)) [@jameslamb](https://github.com/jameslamb)
+- Use int64_t for getters in the cagra/ivf_flat c-api ([#1272](https://github.com/nvidia/cuvs/pull/1272)) [@benfred](https://github.com/benfred)
+- [Java][C] Expose GPUInfo ([#1267](https://github.com/nvidia/cuvs/pull/1267)) [@ldematte](https://github.com/ldematte)
+- Add rust bindings for kmeans ([#1266](https://github.com/nvidia/cuvs/pull/1266)) [@benfred](https://github.com/benfred)
+- [REVIEW] Add a public API for CAGRA graph optimize ([#1260](https://github.com/nvidia/cuvs/pull/1260)) [@abc99lr](https://github.com/abc99lr)
+- Fix debug build ([#1258](https://github.com/nvidia/cuvs/pull/1258)) [@lowener](https://github.com/lowener)
+- Run `cargo fmt` in the pre-commit hooks ([#1255](https://github.com/nvidia/cuvs/pull/1255)) [@benfred](https://github.com/benfred)
+- [REVIEW][Java] Rename destroyIndex() to close(), extend AutoCloseable ([#1252](https://github.com/nvidia/cuvs/pull/1252)) [@ldematte](https://github.com/ldematte)
+- Removing deprecated batching code specific to NN Descent ([#1249](https://github.com/nvidia/cuvs/pull/1249)) [@jinsolp](https://github.com/jinsolp)
+- Pin Latest Faiss Version ([#1247](https://github.com/nvidia/cuvs/pull/1247)) [@tarang-jain](https://github.com/tarang-jain)
+- Use build cluster in devcontainers ([#1240](https://github.com/nvidia/cuvs/pull/1240)) [@trxcllnt](https://github.com/trxcllnt)
+- Use rapids_cuda_enable_fatbin_compression ([#1239](https://github.com/nvidia/cuvs/pull/1239)) [@robertmaynard](https://github.com/robertmaynard)
+- [Improvement] Replace Calls to thrust::transform with raft::linalg ([#1238](https://github.com/nvidia/cuvs/pull/1238)) [@tarang-jain](https://github.com/tarang-jain)
+- [Java] CuVSMatrix for device memory ([#1232](https://github.com/nvidia/cuvs/pull/1232)) [@ldematte](https://github.com/ldematte)
+- Drop log level for CAGRA trace messages ([#1229](https://github.com/nvidia/cuvs/pull/1229)) [@mythrocks](https://github.com/mythrocks)
+- Update rapids_config to handle user defined branch name ([#1227](https://github.com/nvidia/cuvs/pull/1227)) [@robertmaynard](https://github.com/robertmaynard)
+- Reduce noise in Java tests ([#1226](https://github.com/nvidia/cuvs/pull/1226)) [@mythrocks](https://github.com/mythrocks)
+- Update another instance of rapids-build-backend ([#1225](https://github.com/nvidia/cuvs/pull/1225)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
+- Fix missing sync_stream in ScaNN build ([#1224](https://github.com/nvidia/cuvs/pull/1224)) [@rmaschal](https://github.com/rmaschal)
+- [Java] Exception-safe RMM Allocations ([#1215](https://github.com/nvidia/cuvs/pull/1215)) [@mythrocks](https://github.com/mythrocks)
+- Update rapids-build-backend to 0.4.1 ([#1214](https://github.com/nvidia/cuvs/pull/1214)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
+- Removing filter templates in ivfpq for binary size reduction ([#1211](https://github.com/nvidia/cuvs/pull/1211)) [@jinsolp](https://github.com/jinsolp)
+- ci(labeler): update labeler action to [@v5 ([#1210](https://github.com/nvidia/cuvs/pull/1210)) @gforsyth](https://github.com/v5 ([#1210](https://github.com/nvidia/cuvs/pull/1210)) @gforsyth)
+- [Java] Add decorator for synchronized CuVSResource access ([#1209](https://github.com/nvidia/cuvs/pull/1209)) [@ldematte](https://github.com/ldematte)
+- Remove unused templated instances ([#1204](https://github.com/nvidia/cuvs/pull/1204)) [@vinaydes](https://github.com/vinaydes)
+- Spectral Embedding precomputed graph api ([#1197](https://github.com/nvidia/cuvs/pull/1197)) [@aamijar](https://github.com/aamijar)
+- cuvs RAPIDS dependency now use the new rapids-cmake branch info ([#1189](https://github.com/nvidia/cuvs/pull/1189)) [@robertmaynard](https://github.com/robertmaynard)
+- Allow latest OS in devcontainers ([#1169](https://github.com/nvidia/cuvs/pull/1169)) [@bdice](https://github.com/bdice)
+- MG C API ([#1160](https://github.com/nvidia/cuvs/pull/1160)) [@viclafargue](https://github.com/viclafargue)
+- Merge support for tiered index ([#1155](https://github.com/nvidia/cuvs/pull/1155)) [@benfred](https://github.com/benfred)
+- Update build infra to support new branching strategy ([#1143](https://github.com/nvidia/cuvs/pull/1143)) [@robertmaynard](https://github.com/robertmaynard)
+- [Java]Binary and scalar quantization ([#1104](https://github.com/nvidia/cuvs/pull/1104)) [@punAhuja](https://github.com/punAhuja)
+- Improve Vamana index build performance and recall ([#1032](https://github.com/nvidia/cuvs/pull/1032)) [@bkarsin](https://github.com/bkarsin)
+- Use GCC 14 in conda builds. ([#1030](https://github.com/nvidia/cuvs/pull/1030)) [@vyasr](https://github.com/vyasr)
+- Update SNMG ANN API testing ([#1023](https://github.com/nvidia/cuvs/pull/1023)) [@viclafargue](https://github.com/viclafargue)
 
 # cuvs 25.08.00 (6 Aug 2025)
 
 ## 🚨 Breaking Changes
 
-- [Java] Using functions for mapping ([#1007](https://github.com/rapidsai/cuvs/pull/1007)) [@ldematte](https://github.com/ldematte)
-- Remove CUDA 11 from dependencies.yaml ([#962](https://github.com/rapidsai/cuvs/pull/962)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
-- Common knn graph build params ([#949](https://github.com/rapidsai/cuvs/pull/949)) [@jinsolp](https://github.com/jinsolp)
-- stop uploading packages to downloads.rapids.ai ([#940](https://github.com/rapidsai/cuvs/pull/940)) [@jameslamb](https://github.com/jameslamb)
+- [Java] Using functions for mapping ([#1007](https://github.com/nvidia/cuvs/pull/1007)) [@ldematte](https://github.com/ldematte)
+- Remove CUDA 11 from dependencies.yaml ([#962](https://github.com/nvidia/cuvs/pull/962)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
+- Common knn graph build params ([#949](https://github.com/nvidia/cuvs/pull/949)) [@jinsolp](https://github.com/jinsolp)
+- stop uploading packages to downloads.rapids.ai ([#940](https://github.com/nvidia/cuvs/pull/940)) [@jameslamb](https://github.com/jameslamb)
 
 ## 🐛 Bug Fixes
 
-- [BUG] Fix Large Batch Preprocessing for NN Descent ([#1166](https://github.com/rapidsai/cuvs/pull/1166)) [@tarang-jain](https://github.com/tarang-jain)
-- Fix TieredIndexIT,  Scoped resource access change was missed ([#1156](https://github.com/rapidsai/cuvs/pull/1156)) [@punAhuja](https://github.com/punAhuja)
-- Fix for `hellinger` metric ([#1128](https://github.com/rapidsai/cuvs/pull/1128)) [@viclafargue](https://github.com/viclafargue)
-- Fix go CI failures ([#1114](https://github.com/rapidsai/cuvs/pull/1114)) [@benfred](https://github.com/benfred)
-- Conditionally post process distances in NN Descent for use with distance epilogue ([#1108](https://github.com/rapidsai/cuvs/pull/1108)) [@jinsolp](https://github.com/jinsolp)
-- [BUG] Disable flaky cpp test `AnnIVFFlatTestF_half.AnnIVFFlat/21` ([#1092](https://github.com/rapidsai/cuvs/pull/1092)) [@julianmi](https://github.com/julianmi)
-- [BUG] Fix device access check in build_sorted_mst ([#1083](https://github.com/rapidsai/cuvs/pull/1083)) [@tarang-jain](https://github.com/tarang-jain)
-- Pin Faiss Version to Last Stable Release ([#1029](https://github.com/rapidsai/cuvs/pull/1029)) [@tarang-jain](https://github.com/tarang-jain)
-- Fix verify-copyright precommit hook ([#1017](https://github.com/rapidsai/cuvs/pull/1017)) [@benfred](https://github.com/benfred)
-- Adding GH_TOKEN pass-through to summarize job ([#1011](https://github.com/rapidsai/cuvs/pull/1011)) [@msarahan](https://github.com/msarahan)
-- Reuse `&lt;version_config.h&gt;` instead of separate version constants in `c_api.h` ([#1003](https://github.com/rapidsai/cuvs/pull/1003)) [@mythrocks](https://github.com/mythrocks)
-- IVF-PQ coarse search: fix integer overflow and avoid excessive batch sizes ([#999](https://github.com/rapidsai/cuvs/pull/999)) [@achirkin](https://github.com/achirkin)
-- Fix C compile error in `tiered_index.h` ([#996](https://github.com/rapidsai/cuvs/pull/996)) [@mythrocks](https://github.com/mythrocks)
-- Fix for NN Descent negative or NaN distances ([#994](https://github.com/rapidsai/cuvs/pull/994)) [@jinsolp](https://github.com/jinsolp)
-- Adding int64 search for MG CAGRA ([#975](https://github.com/rapidsai/cuvs/pull/975)) [@viclafargue](https://github.com/viclafargue)
-- [ANN_BENCH] Fix OpenMP Sections in Wrappers ([#911](https://github.com/rapidsai/cuvs/pull/911)) [@tarang-jain](https://github.com/tarang-jain)
-- [BUG] cuvs-bench data_export fixes ([#863](https://github.com/rapidsai/cuvs/pull/863)) [@tarang-jain](https://github.com/tarang-jain)
+- [BUG] Fix Large Batch Preprocessing for NN Descent ([#1166](https://github.com/nvidia/cuvs/pull/1166)) [@tarang-jain](https://github.com/tarang-jain)
+- Fix TieredIndexIT,  Scoped resource access change was missed ([#1156](https://github.com/nvidia/cuvs/pull/1156)) [@punAhuja](https://github.com/punAhuja)
+- Fix for `hellinger` metric ([#1128](https://github.com/nvidia/cuvs/pull/1128)) [@viclafargue](https://github.com/viclafargue)
+- Fix go CI failures ([#1114](https://github.com/nvidia/cuvs/pull/1114)) [@benfred](https://github.com/benfred)
+- Conditionally post process distances in NN Descent for use with distance epilogue ([#1108](https://github.com/nvidia/cuvs/pull/1108)) [@jinsolp](https://github.com/jinsolp)
+- [BUG] Disable flaky cpp test `AnnIVFFlatTestF_half.AnnIVFFlat/21` ([#1092](https://github.com/nvidia/cuvs/pull/1092)) [@julianmi](https://github.com/julianmi)
+- [BUG] Fix device access check in build_sorted_mst ([#1083](https://github.com/nvidia/cuvs/pull/1083)) [@tarang-jain](https://github.com/tarang-jain)
+- Pin Faiss Version to Last Stable Release ([#1029](https://github.com/nvidia/cuvs/pull/1029)) [@tarang-jain](https://github.com/tarang-jain)
+- Fix verify-copyright precommit hook ([#1017](https://github.com/nvidia/cuvs/pull/1017)) [@benfred](https://github.com/benfred)
+- Adding GH_TOKEN pass-through to summarize job ([#1011](https://github.com/nvidia/cuvs/pull/1011)) [@msarahan](https://github.com/msarahan)
+- Reuse `&lt;version_config.h&gt;` instead of separate version constants in `c_api.h` ([#1003](https://github.com/nvidia/cuvs/pull/1003)) [@mythrocks](https://github.com/mythrocks)
+- IVF-PQ coarse search: fix integer overflow and avoid excessive batch sizes ([#999](https://github.com/nvidia/cuvs/pull/999)) [@achirkin](https://github.com/achirkin)
+- Fix C compile error in `tiered_index.h` ([#996](https://github.com/nvidia/cuvs/pull/996)) [@mythrocks](https://github.com/mythrocks)
+- Fix for NN Descent negative or NaN distances ([#994](https://github.com/nvidia/cuvs/pull/994)) [@jinsolp](https://github.com/jinsolp)
+- Adding int64 search for MG CAGRA ([#975](https://github.com/nvidia/cuvs/pull/975)) [@viclafargue](https://github.com/viclafargue)
+- [ANN_BENCH] Fix OpenMP Sections in Wrappers ([#911](https://github.com/nvidia/cuvs/pull/911)) [@tarang-jain](https://github.com/tarang-jain)
+- [BUG] cuvs-bench data_export fixes ([#863](https://github.com/nvidia/cuvs/pull/863)) [@tarang-jain](https://github.com/tarang-jain)
 
 ## 📖 Documentation
 
-- [ANN_BENCH] [DOCS] Add Vamana / DiskANN to cuvs-bench Docs ([#1164](https://github.com/rapidsai/cuvs/pull/1164)) [@tarang-jain](https://github.com/tarang-jain)
-- Add Golang build instructions to build.rst ([#1116](https://github.com/rapidsai/cuvs/pull/1116)) [@mythrocks](https://github.com/mythrocks)
-- Minor correction for Java test instructions ([#1081](https://github.com/rapidsai/cuvs/pull/1081)) [@mythrocks](https://github.com/mythrocks)
-- add docs on CI workflow inputs ([#1075](https://github.com/rapidsai/cuvs/pull/1075)) [@jameslamb](https://github.com/jameslamb)
-- [REVIEW][Java] Add instructions on how to run a single test ([#1004](https://github.com/rapidsai/cuvs/pull/1004)) [@ldematte](https://github.com/ldematte)
+- [ANN_BENCH] [DOCS] Add Vamana / DiskANN to cuvs-bench Docs ([#1164](https://github.com/nvidia/cuvs/pull/1164)) [@tarang-jain](https://github.com/tarang-jain)
+- Add Golang build instructions to build.rst ([#1116](https://github.com/nvidia/cuvs/pull/1116)) [@mythrocks](https://github.com/mythrocks)
+- Minor correction for Java test instructions ([#1081](https://github.com/nvidia/cuvs/pull/1081)) [@mythrocks](https://github.com/mythrocks)
+- add docs on CI workflow inputs ([#1075](https://github.com/nvidia/cuvs/pull/1075)) [@jameslamb](https://github.com/jameslamb)
+- [REVIEW][Java] Add instructions on how to run a single test ([#1004](https://github.com/nvidia/cuvs/pull/1004)) [@ldematte](https://github.com/ldematte)
 
 ## 🚀 New Features
 
-- Binary CAGRA with NN Descent ([#1133](https://github.com/rapidsai/cuvs/pull/1133)) [@tarang-jain](https://github.com/tarang-jain)
-- CAGRA build heuristics for HNSW ([#1125](https://github.com/rapidsai/cuvs/pull/1125)) [@achirkin](https://github.com/achirkin)
-- BitwiseHamming distance for NN Descent ([#1101](https://github.com/rapidsai/cuvs/pull/1101)) [@jinsolp](https://github.com/jinsolp)
-- `ReachabilityPostProcess` distance epilogue for NN Descent ([#1073](https://github.com/rapidsai/cuvs/pull/1073)) [@jinsolp](https://github.com/jinsolp)
-- Add brute force to `all_neighbors` API ([#1062](https://github.com/rapidsai/cuvs/pull/1062)) [@jinsolp](https://github.com/jinsolp)
-- [Java] Support for tiered index ([#1028](https://github.com/rapidsai/cuvs/pull/1028)) [@punAhuja](https://github.com/punAhuja)
-- Enable `build_sorted_mst` with data on host memory ([#997](https://github.com/rapidsai/cuvs/pull/997)) [@jinsolp](https://github.com/jinsolp)
-- Add C-API method to fetch cuVS version from Java. ([#935](https://github.com/rapidsai/cuvs/pull/935)) [@mythrocks](https://github.com/mythrocks)
-- Spectral Embedding ([#871](https://github.com/rapidsai/cuvs/pull/871)) [@aamijar](https://github.com/aamijar)
-- [FEA] Build Single Linkage API ([#820](https://github.com/rapidsai/cuvs/pull/820)) [@tarang-jain](https://github.com/tarang-jain)
+- Binary CAGRA with NN Descent ([#1133](https://github.com/nvidia/cuvs/pull/1133)) [@tarang-jain](https://github.com/tarang-jain)
+- CAGRA build heuristics for HNSW ([#1125](https://github.com/nvidia/cuvs/pull/1125)) [@achirkin](https://github.com/achirkin)
+- BitwiseHamming distance for NN Descent ([#1101](https://github.com/nvidia/cuvs/pull/1101)) [@jinsolp](https://github.com/jinsolp)
+- `ReachabilityPostProcess` distance epilogue for NN Descent ([#1073](https://github.com/nvidia/cuvs/pull/1073)) [@jinsolp](https://github.com/jinsolp)
+- Add brute force to `all_neighbors` API ([#1062](https://github.com/nvidia/cuvs/pull/1062)) [@jinsolp](https://github.com/jinsolp)
+- [Java] Support for tiered index ([#1028](https://github.com/nvidia/cuvs/pull/1028)) [@punAhuja](https://github.com/punAhuja)
+- Enable `build_sorted_mst` with data on host memory ([#997](https://github.com/nvidia/cuvs/pull/997)) [@jinsolp](https://github.com/jinsolp)
+- Add C-API method to fetch cuVS version from Java. ([#935](https://github.com/nvidia/cuvs/pull/935)) [@mythrocks](https://github.com/mythrocks)
+- Spectral Embedding ([#871](https://github.com/nvidia/cuvs/pull/871)) [@aamijar](https://github.com/aamijar)
+- [FEA] Build Single Linkage API ([#820](https://github.com/nvidia/cuvs/pull/820)) [@tarang-jain](https://github.com/tarang-jain)
 
 ## 🛠️ Improvements
 
-- Check shape is initialized in cuvsMatrixSliceRows ([#1193](https://github.com/rapidsai/cuvs/pull/1193)) [@benfred](https://github.com/benfred)
-- Add error checking for our C example code ([#1171](https://github.com/rapidsai/cuvs/pull/1171)) [@benfred](https://github.com/benfred)
-- Removing all references to CUDA 11 from codebase ([#1150](https://github.com/rapidsai/cuvs/pull/1150)) [@cjnolet](https://github.com/cjnolet)
-- [Java]Fixed TieredIndexParams creation/destruction, calling cuvsTieredIndexParamsCreate ([#1147](https://github.com/rapidsai/cuvs/pull/1147)) [@punAhuja](https://github.com/punAhuja)
-- Fix update-version ([#1135](https://github.com/rapidsai/cuvs/pull/1135)) [@AyodeAwe](https://github.com/AyodeAwe)
-- Reducing binary size in `ivf_pq` by deduplicating kernels ([#1130](https://github.com/rapidsai/cuvs/pull/1130)) [@jinsolp](https://github.com/jinsolp)
-- fix(docker): use versioned `-latest` tag for all `rapidsai` images ([#1129](https://github.com/rapidsai/cuvs/pull/1129)) [@gforsyth](https://github.com/gforsyth)
-- SCaNN Index build ([#1120](https://github.com/rapidsai/cuvs/pull/1120)) [@rmaschal](https://github.com/rmaschal)
-- [Java] Extend `Dataset` to work as an output data container ([#1111](https://github.com/rapidsai/cuvs/pull/1111)) [@ldematte](https://github.com/ldematte)
-- [Java] Fix HNSW params allocation ([#1110](https://github.com/rapidsai/cuvs/pull/1110)) [@ldematte](https://github.com/ldematte)
-- [Java] Fix CAGRA params allocation ([#1109](https://github.com/rapidsai/cuvs/pull/1109)) [@ldematte](https://github.com/ldematte)
-- [Java] Fix POM ([#1106](https://github.com/rapidsai/cuvs/pull/1106)) [@ldematte](https://github.com/ldematte)
-- Enforce java codeowners for managing contents of `java/` ([#1103](https://github.com/rapidsai/cuvs/pull/1103)) [@cjnolet](https://github.com/cjnolet)
-- Reduce binary size of refine functions ([#1095](https://github.com/rapidsai/cuvs/pull/1095)) [@tfeher](https://github.com/tfeher)
-- [Java] Introduce scoped resource access ([#1089](https://github.com/rapidsai/cuvs/pull/1089)) [@ldematte](https://github.com/ldematte)
-- Expose graph and dataset accessors for CAGRA to C/Python ([#1086](https://github.com/rapidsai/cuvs/pull/1086)) [@benfred](https://github.com/benfred)
-- Add warning for unused GPU when `n_clusters &lt; n_ranks` in batch `all_neighbors` ([#1072](https://github.com/rapidsai/cuvs/pull/1072)) [@jinsolp](https://github.com/jinsolp)
-- [Java] Tidy up `MemorySegment`s lifecycle ([#1069](https://github.com/rapidsai/cuvs/pull/1069)) [@ldematte](https://github.com/ldematte)
-- [Java] Refactor SearchResults implementation classes ([#1067](https://github.com/rapidsai/cuvs/pull/1067)) [@ldematte](https://github.com/ldematte)
-- Use CUDA 12.9 in Conda, Devcontainers, Spark, GHA, etc. ([#1063](https://github.com/rapidsai/cuvs/pull/1063)) [@jakirkham](https://github.com/jakirkham)
-- Exporting changed `graph_build_params` namespace into `all_neighbors` ([#1060](https://github.com/rapidsai/cuvs/pull/1060)) [@jinsolp](https://github.com/jinsolp)
-- hnsw::from_cagra: avoid allocating the graph twice ([#1057](https://github.com/rapidsai/cuvs/pull/1057)) [@achirkin](https://github.com/achirkin)
-- Improve memory efficiency for returning NN Descent distances ([#1053](https://github.com/rapidsai/cuvs/pull/1053)) [@jinsolp](https://github.com/jinsolp)
-- Deprecation notice for batching code specific to NN Descent ([#1052](https://github.com/rapidsai/cuvs/pull/1052)) [@jinsolp](https://github.com/jinsolp)
-- Remove nvidia and dask channels ([#1050](https://github.com/rapidsai/cuvs/pull/1050)) [@vyasr](https://github.com/vyasr)
-- [Java] Enforce Java code format standard ([#1049](https://github.com/rapidsai/cuvs/pull/1049)) [@narangvivek10](https://github.com/narangvivek10)
-- Make rust publish run after conda upload ([#1047](https://github.com/rapidsai/cuvs/pull/1047)) [@AyodeAwe](https://github.com/AyodeAwe)
-- refactor(cuda11): remove cuda11-only conda channels, cleanup docs ([#1046](https://github.com/rapidsai/cuvs/pull/1046)) [@gforsyth](https://github.com/gforsyth)
-- [Review][Java] Fix random segabort/segfault/double free problems ([#1045](https://github.com/rapidsai/cuvs/pull/1045)) [@ldematte](https://github.com/ldematte)
-- [Java] Add Dataset based on `MemorySegment` ([#1034](https://github.com/rapidsai/cuvs/pull/1034)) [@ldematte](https://github.com/ldematte)
-- [Java] Add Java API benchmarks ([#1033](https://github.com/rapidsai/cuvs/pull/1033)) [@ldematte](https://github.com/ldematte)
-- fix(cli): Add validation for search-mode parameter ([#1026](https://github.com/rapidsai/cuvs/pull/1026)) [@mayani-nv](https://github.com/mayani-nv)
-- [Java] Encapsulate on-heap float arrays into `Dataset` ([#1024](https://github.com/rapidsai/cuvs/pull/1024)) [@ldematte](https://github.com/ldematte)
-- refactor(shellcheck): fix all remaining warnings/errors ([#1019](https://github.com/rapidsai/cuvs/pull/1019)) [@gforsyth](https://github.com/gforsyth)
-- Instantiate only specific RAFT linewise kernels ([#1018](https://github.com/rapidsai/cuvs/pull/1018)) [@aamijar](https://github.com/aamijar)
-- Update nightly CI check to allow 30 days of failing nightly build. ([#1008](https://github.com/rapidsai/cuvs/pull/1008)) [@cjnolet](https://github.com/cjnolet)
-- [Java] Using functions for mapping ([#1007](https://github.com/rapidsai/cuvs/pull/1007)) [@ldematte](https://github.com/ldematte)
-- [REVIEW][Java] Refactor CagraBuildAndSearchIT to explicitly express different execution modes ([#1006](https://github.com/rapidsai/cuvs/pull/1006)) [@ldematte](https://github.com/ldematte)
-- [REVIEW][Java] Add jextract artifacts to gitignore ([#1005](https://github.com/rapidsai/cuvs/pull/1005)) [@ldematte](https://github.com/ldematte)
-- Remove pytest pin ([#998](https://github.com/rapidsai/cuvs/pull/998)) [@vyasr](https://github.com/vyasr)
-- [java] Utility function for `cudaMemcpy` ([#983](https://github.com/rapidsai/cuvs/pull/983)) [@mythrocks](https://github.com/mythrocks)
-- [java] Copy `pom.xml` to `target/` on build ([#981](https://github.com/rapidsai/cuvs/pull/981)) [@mythrocks](https://github.com/mythrocks)
-- Remove CUDA 11 from dependencies.yaml ([#962](https://github.com/rapidsai/cuvs/pull/962)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
-- refactor(rattler): remove cuda11 options and general cleanup ([#961](https://github.com/rapidsai/cuvs/pull/961)) [@gforsyth](https://github.com/gforsyth)
-- Remove CUDA 11 devcontainers and update CI scripts ([#960](https://github.com/rapidsai/cuvs/pull/960)) [@bdice](https://github.com/bdice)
-- Common knn graph build params ([#949](https://github.com/rapidsai/cuvs/pull/949)) [@jinsolp](https://github.com/jinsolp)
-- stop uploading packages to downloads.rapids.ai ([#940](https://github.com/rapidsai/cuvs/pull/940)) [@jameslamb](https://github.com/jameslamb)
-- Instantiate only specific RAFT reduction kernels ([#925](https://github.com/rapidsai/cuvs/pull/925)) [@divyegala](https://github.com/divyegala)
-- [Java] Replace the C wrapper layer with the Panama API-based Java implementation ([#916](https://github.com/rapidsai/cuvs/pull/916)) [@narangvivek10](https://github.com/narangvivek10)
-- Forward-merge branch-25.06 into branch-25.08 ([#909](https://github.com/rapidsai/cuvs/pull/909)) [@gforsyth](https://github.com/gforsyth)
-- Forward-merge branch-25.06 into branch-25.08 ([#897](https://github.com/rapidsai/cuvs/pull/897)) [@gforsyth](https://github.com/gforsyth)
-- Serialize Vamana index with SSD sector alignment per MSFT DiskANN format, generate quantized dataset for integration with DiskANN ([#846](https://github.com/rapidsai/cuvs/pull/846)) [@jamxia155](https://github.com/jamxia155)
-- Add mean and median threshold modes to binary quantization ([#716](https://github.com/rapidsai/cuvs/pull/716)) [@enp1s0](https://github.com/enp1s0)
+- Check shape is initialized in cuvsMatrixSliceRows ([#1193](https://github.com/nvidia/cuvs/pull/1193)) [@benfred](https://github.com/benfred)
+- Add error checking for our C example code ([#1171](https://github.com/nvidia/cuvs/pull/1171)) [@benfred](https://github.com/benfred)
+- Removing all references to CUDA 11 from codebase ([#1150](https://github.com/nvidia/cuvs/pull/1150)) [@cjnolet](https://github.com/cjnolet)
+- [Java]Fixed TieredIndexParams creation/destruction, calling cuvsTieredIndexParamsCreate ([#1147](https://github.com/nvidia/cuvs/pull/1147)) [@punAhuja](https://github.com/punAhuja)
+- Fix update-version ([#1135](https://github.com/nvidia/cuvs/pull/1135)) [@AyodeAwe](https://github.com/AyodeAwe)
+- Reducing binary size in `ivf_pq` by deduplicating kernels ([#1130](https://github.com/nvidia/cuvs/pull/1130)) [@jinsolp](https://github.com/jinsolp)
+- fix(docker): use versioned `-latest` tag for all `rapidsai` images ([#1129](https://github.com/nvidia/cuvs/pull/1129)) [@gforsyth](https://github.com/gforsyth)
+- SCaNN Index build ([#1120](https://github.com/nvidia/cuvs/pull/1120)) [@rmaschal](https://github.com/rmaschal)
+- [Java] Extend `Dataset` to work as an output data container ([#1111](https://github.com/nvidia/cuvs/pull/1111)) [@ldematte](https://github.com/ldematte)
+- [Java] Fix HNSW params allocation ([#1110](https://github.com/nvidia/cuvs/pull/1110)) [@ldematte](https://github.com/ldematte)
+- [Java] Fix CAGRA params allocation ([#1109](https://github.com/nvidia/cuvs/pull/1109)) [@ldematte](https://github.com/ldematte)
+- [Java] Fix POM ([#1106](https://github.com/nvidia/cuvs/pull/1106)) [@ldematte](https://github.com/ldematte)
+- Enforce java codeowners for managing contents of `java/` ([#1103](https://github.com/nvidia/cuvs/pull/1103)) [@cjnolet](https://github.com/cjnolet)
+- Reduce binary size of refine functions ([#1095](https://github.com/nvidia/cuvs/pull/1095)) [@tfeher](https://github.com/tfeher)
+- [Java] Introduce scoped resource access ([#1089](https://github.com/nvidia/cuvs/pull/1089)) [@ldematte](https://github.com/ldematte)
+- Expose graph and dataset accessors for CAGRA to C/Python ([#1086](https://github.com/nvidia/cuvs/pull/1086)) [@benfred](https://github.com/benfred)
+- Add warning for unused GPU when `n_clusters &lt; n_ranks` in batch `all_neighbors` ([#1072](https://github.com/nvidia/cuvs/pull/1072)) [@jinsolp](https://github.com/jinsolp)
+- [Java] Tidy up `MemorySegment`s lifecycle ([#1069](https://github.com/nvidia/cuvs/pull/1069)) [@ldematte](https://github.com/ldematte)
+- [Java] Refactor SearchResults implementation classes ([#1067](https://github.com/nvidia/cuvs/pull/1067)) [@ldematte](https://github.com/ldematte)
+- Use CUDA 12.9 in Conda, Devcontainers, Spark, GHA, etc. ([#1063](https://github.com/nvidia/cuvs/pull/1063)) [@jakirkham](https://github.com/jakirkham)
+- Exporting changed `graph_build_params` namespace into `all_neighbors` ([#1060](https://github.com/nvidia/cuvs/pull/1060)) [@jinsolp](https://github.com/jinsolp)
+- hnsw::from_cagra: avoid allocating the graph twice ([#1057](https://github.com/nvidia/cuvs/pull/1057)) [@achirkin](https://github.com/achirkin)
+- Improve memory efficiency for returning NN Descent distances ([#1053](https://github.com/nvidia/cuvs/pull/1053)) [@jinsolp](https://github.com/jinsolp)
+- Deprecation notice for batching code specific to NN Descent ([#1052](https://github.com/nvidia/cuvs/pull/1052)) [@jinsolp](https://github.com/jinsolp)
+- Remove nvidia and dask channels ([#1050](https://github.com/nvidia/cuvs/pull/1050)) [@vyasr](https://github.com/vyasr)
+- [Java] Enforce Java code format standard ([#1049](https://github.com/nvidia/cuvs/pull/1049)) [@narangvivek10](https://github.com/narangvivek10)
+- Make rust publish run after conda upload ([#1047](https://github.com/nvidia/cuvs/pull/1047)) [@AyodeAwe](https://github.com/AyodeAwe)
+- refactor(cuda11): remove cuda11-only conda channels, cleanup docs ([#1046](https://github.com/nvidia/cuvs/pull/1046)) [@gforsyth](https://github.com/gforsyth)
+- [Review][Java] Fix random segabort/segfault/double free problems ([#1045](https://github.com/nvidia/cuvs/pull/1045)) [@ldematte](https://github.com/ldematte)
+- [Java] Add Dataset based on `MemorySegment` ([#1034](https://github.com/nvidia/cuvs/pull/1034)) [@ldematte](https://github.com/ldematte)
+- [Java] Add Java API benchmarks ([#1033](https://github.com/nvidia/cuvs/pull/1033)) [@ldematte](https://github.com/ldematte)
+- fix(cli): Add validation for search-mode parameter ([#1026](https://github.com/nvidia/cuvs/pull/1026)) [@mayani-nv](https://github.com/mayani-nv)
+- [Java] Encapsulate on-heap float arrays into `Dataset` ([#1024](https://github.com/nvidia/cuvs/pull/1024)) [@ldematte](https://github.com/ldematte)
+- refactor(shellcheck): fix all remaining warnings/errors ([#1019](https://github.com/nvidia/cuvs/pull/1019)) [@gforsyth](https://github.com/gforsyth)
+- Instantiate only specific RAFT linewise kernels ([#1018](https://github.com/nvidia/cuvs/pull/1018)) [@aamijar](https://github.com/aamijar)
+- Update nightly CI check to allow 30 days of failing nightly build. ([#1008](https://github.com/nvidia/cuvs/pull/1008)) [@cjnolet](https://github.com/cjnolet)
+- [Java] Using functions for mapping ([#1007](https://github.com/nvidia/cuvs/pull/1007)) [@ldematte](https://github.com/ldematte)
+- [REVIEW][Java] Refactor CagraBuildAndSearchIT to explicitly express different execution modes ([#1006](https://github.com/nvidia/cuvs/pull/1006)) [@ldematte](https://github.com/ldematte)
+- [REVIEW][Java] Add jextract artifacts to gitignore ([#1005](https://github.com/nvidia/cuvs/pull/1005)) [@ldematte](https://github.com/ldematte)
+- Remove pytest pin ([#998](https://github.com/nvidia/cuvs/pull/998)) [@vyasr](https://github.com/vyasr)
+- [java] Utility function for `cudaMemcpy` ([#983](https://github.com/nvidia/cuvs/pull/983)) [@mythrocks](https://github.com/mythrocks)
+- [java] Copy `pom.xml` to `target/` on build ([#981](https://github.com/nvidia/cuvs/pull/981)) [@mythrocks](https://github.com/mythrocks)
+- Remove CUDA 11 from dependencies.yaml ([#962](https://github.com/nvidia/cuvs/pull/962)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
+- refactor(rattler): remove cuda11 options and general cleanup ([#961](https://github.com/nvidia/cuvs/pull/961)) [@gforsyth](https://github.com/gforsyth)
+- Remove CUDA 11 devcontainers and update CI scripts ([#960](https://github.com/nvidia/cuvs/pull/960)) [@bdice](https://github.com/bdice)
+- Common knn graph build params ([#949](https://github.com/nvidia/cuvs/pull/949)) [@jinsolp](https://github.com/jinsolp)
+- stop uploading packages to downloads.rapids.ai ([#940](https://github.com/nvidia/cuvs/pull/940)) [@jameslamb](https://github.com/jameslamb)
+- Instantiate only specific RAFT reduction kernels ([#925](https://github.com/nvidia/cuvs/pull/925)) [@divyegala](https://github.com/divyegala)
+- [Java] Replace the C wrapper layer with the Panama API-based Java implementation ([#916](https://github.com/nvidia/cuvs/pull/916)) [@narangvivek10](https://github.com/narangvivek10)
+- Forward-merge branch-25.06 into branch-25.08 ([#909](https://github.com/nvidia/cuvs/pull/909)) [@gforsyth](https://github.com/gforsyth)
+- Forward-merge branch-25.06 into branch-25.08 ([#897](https://github.com/nvidia/cuvs/pull/897)) [@gforsyth](https://github.com/gforsyth)
+- Serialize Vamana index with SSD sector alignment per MSFT DiskANN format, generate quantized dataset for integration with DiskANN ([#846](https://github.com/nvidia/cuvs/pull/846)) [@jamxia155](https://github.com/jamxia155)
+- Add mean and median threshold modes to binary quantization ([#716](https://github.com/nvidia/cuvs/pull/716)) [@enp1s0](https://github.com/enp1s0)
 
 # cuvs 25.06.00 (5 Jun 2025)
 
 ## 🚨 Breaking Changes
 
-- [Java] Adding support for prefiltering in CAGRA ([#870](https://github.com/rapidsai/cuvs/pull/870)) [@punAhuja](https://github.com/punAhuja)
+- [Java] Adding support for prefiltering in CAGRA ([#870](https://github.com/nvidia/cuvs/pull/870)) [@punAhuja](https://github.com/punAhuja)
 
 ## 🐛 Bug Fixes
 
-- Fix failing CAGRA merge google tests for 25.06 ([#974](https://github.com/rapidsai/cuvs/pull/974)) [@rhdong](https://github.com/rhdong)
-- Fix for recent NCCL resource update ([#968](https://github.com/rapidsai/cuvs/pull/968)) [@viclafargue](https://github.com/viclafargue)
-- Revert &quot;Fix kmeans::predict argument order ([#915)&quot; (#951](https://github.com/rapidsai/cuvs/pull/915)&quot; (#951)) [@divyegala](https://github.com/divyegala)
-- IVF-PQ tests: fix segfault when accessing empty lists ([#933](https://github.com/rapidsai/cuvs/pull/933)) [@achirkin](https://github.com/achirkin)
-- batch_load_iterator shall copy data located in host memory ([#926](https://github.com/rapidsai/cuvs/pull/926)) [@tfeher](https://github.com/tfeher)
-- Fix kmeans::predict argument order ([#915](https://github.com/rapidsai/cuvs/pull/915)) [@enp1s0](https://github.com/enp1s0)
-- ANN_BENCH: fix the reported core count ([#896](https://github.com/rapidsai/cuvs/pull/896)) [@achirkin](https://github.com/achirkin)
-- CUVS_ANN_BENCH_SINGLE_EXE: find the mg library ([#890](https://github.com/rapidsai/cuvs/pull/890)) [@achirkin](https://github.com/achirkin)
-- [FIX] Revert negated InnerProduct distance for NN Descent ([#859](https://github.com/rapidsai/cuvs/pull/859)) [@jinsolp](https://github.com/jinsolp)
-- IVF-PQ tests: fix segfault when accessing empty lists ([#838](https://github.com/rapidsai/cuvs/pull/838)) [@achirkin](https://github.com/achirkin)
-- [CuVS-Java] Automate panama bindings generation, Include IVF_PQ parameters in CAGRA index parameters and other changes ([#831](https://github.com/rapidsai/cuvs/pull/831)) [@narangvivek10](https://github.com/narangvivek10)
-- Ann-bench: fix unsafe lazy blobs ([#828](https://github.com/rapidsai/cuvs/pull/828)) [@achirkin](https://github.com/achirkin)
-- Fix test_brute_force conversion ([#821](https://github.com/rapidsai/cuvs/pull/821)) [@lowener](https://github.com/lowener)
-- [FEA] New algos and updates/corrections to Faiss cuvs-bench ([#677](https://github.com/rapidsai/cuvs/pull/677)) [@tarang-jain](https://github.com/tarang-jain)
+- Fix failing CAGRA merge google tests for 25.06 ([#974](https://github.com/nvidia/cuvs/pull/974)) [@rhdong](https://github.com/rhdong)
+- Fix for recent NCCL resource update ([#968](https://github.com/nvidia/cuvs/pull/968)) [@viclafargue](https://github.com/viclafargue)
+- Revert &quot;Fix kmeans::predict argument order ([#915)&quot; (#951](https://github.com/nvidia/cuvs/pull/915)&quot; (#951)) [@divyegala](https://github.com/divyegala)
+- IVF-PQ tests: fix segfault when accessing empty lists ([#933](https://github.com/nvidia/cuvs/pull/933)) [@achirkin](https://github.com/achirkin)
+- batch_load_iterator shall copy data located in host memory ([#926](https://github.com/nvidia/cuvs/pull/926)) [@tfeher](https://github.com/tfeher)
+- Fix kmeans::predict argument order ([#915](https://github.com/nvidia/cuvs/pull/915)) [@enp1s0](https://github.com/enp1s0)
+- ANN_BENCH: fix the reported core count ([#896](https://github.com/nvidia/cuvs/pull/896)) [@achirkin](https://github.com/achirkin)
+- CUVS_ANN_BENCH_SINGLE_EXE: find the mg library ([#890](https://github.com/nvidia/cuvs/pull/890)) [@achirkin](https://github.com/achirkin)
+- [FIX] Revert negated InnerProduct distance for NN Descent ([#859](https://github.com/nvidia/cuvs/pull/859)) [@jinsolp](https://github.com/jinsolp)
+- IVF-PQ tests: fix segfault when accessing empty lists ([#838](https://github.com/nvidia/cuvs/pull/838)) [@achirkin](https://github.com/achirkin)
+- [CuVS-Java] Automate panama bindings generation, Include IVF_PQ parameters in CAGRA index parameters and other changes ([#831](https://github.com/nvidia/cuvs/pull/831)) [@narangvivek10](https://github.com/narangvivek10)
+- Ann-bench: fix unsafe lazy blobs ([#828](https://github.com/nvidia/cuvs/pull/828)) [@achirkin](https://github.com/achirkin)
+- Fix test_brute_force conversion ([#821](https://github.com/nvidia/cuvs/pull/821)) [@lowener](https://github.com/lowener)
+- [FEA] New algos and updates/corrections to Faiss cuvs-bench ([#677](https://github.com/nvidia/cuvs/pull/677)) [@tarang-jain](https://github.com/tarang-jain)
 
 ## 📖 Documentation
 
-- All-neighbors API docs ([#944](https://github.com/rapidsai/cuvs/pull/944)) [@jinsolp](https://github.com/jinsolp)
-- cagra.rst: fitered -&gt; filtered ([#866](https://github.com/rapidsai/cuvs/pull/866)) [@eli-b](https://github.com/eli-b)
-- [DOCS] Fix Integration Docs (Faiss) ([#782](https://github.com/rapidsai/cuvs/pull/782)) [@tarang-jain](https://github.com/tarang-jain)
+- All-neighbors API docs ([#944](https://github.com/nvidia/cuvs/pull/944)) [@jinsolp](https://github.com/jinsolp)
+- cagra.rst: fitered -&gt; filtered ([#866](https://github.com/nvidia/cuvs/pull/866)) [@eli-b](https://github.com/eli-b)
+- [DOCS] Fix Integration Docs (Faiss) ([#782](https://github.com/nvidia/cuvs/pull/782)) [@tarang-jain](https://github.com/tarang-jain)
 
 ## 🚀 New Features
 
-- Add IVF-PQ support in CAGRA index params in Python ([#918](https://github.com/rapidsai/cuvs/pull/918)) [@lowener](https://github.com/lowener)
-- [Feat] Expose C API for CAGRA `merge` ([#860](https://github.com/rapidsai/cuvs/pull/860)) [@rhdong](https://github.com/rhdong)
-- Use NCCL wheels from PyPI for CUDA 12 builds ([#827](https://github.com/rapidsai/cuvs/pull/827)) [@divyegala](https://github.com/divyegala)
-- Add support for half in CAGRA+HNSW ([#813](https://github.com/rapidsai/cuvs/pull/813)) [@lowener](https://github.com/lowener)
-- [CI] Enable Java test in CI workflow ([#805](https://github.com/rapidsai/cuvs/pull/805)) [@rhdong](https://github.com/rhdong)
-- Wrapper for all-neighbors knn graph building ([#785](https://github.com/rapidsai/cuvs/pull/785)) [@jinsolp](https://github.com/jinsolp)
-- Add support of half dtype in IVF-FLAT ([#730](https://github.com/rapidsai/cuvs/pull/730)) [@lowener](https://github.com/lowener)
-- IVF-PQ: low-precision coarse search ([#715](https://github.com/rapidsai/cuvs/pull/715)) [@achirkin](https://github.com/achirkin)
-- [Feat] Add support of logical merge in Cagra ([#713](https://github.com/rapidsai/cuvs/pull/713)) [@rhdong](https://github.com/rhdong)
+- Add IVF-PQ support in CAGRA index params in Python ([#918](https://github.com/nvidia/cuvs/pull/918)) [@lowener](https://github.com/lowener)
+- [Feat] Expose C API for CAGRA `merge` ([#860](https://github.com/nvidia/cuvs/pull/860)) [@rhdong](https://github.com/rhdong)
+- Use NCCL wheels from PyPI for CUDA 12 builds ([#827](https://github.com/nvidia/cuvs/pull/827)) [@divyegala](https://github.com/divyegala)
+- Add support for half in CAGRA+HNSW ([#813](https://github.com/nvidia/cuvs/pull/813)) [@lowener](https://github.com/lowener)
+- [CI] Enable Java test in CI workflow ([#805](https://github.com/nvidia/cuvs/pull/805)) [@rhdong](https://github.com/rhdong)
+- Wrapper for all-neighbors knn graph building ([#785](https://github.com/nvidia/cuvs/pull/785)) [@jinsolp](https://github.com/jinsolp)
+- Add support of half dtype in IVF-FLAT ([#730](https://github.com/nvidia/cuvs/pull/730)) [@lowener](https://github.com/lowener)
+- IVF-PQ: low-precision coarse search ([#715](https://github.com/nvidia/cuvs/pull/715)) [@achirkin](https://github.com/achirkin)
+- [Feat] Add support of logical merge in Cagra ([#713](https://github.com/nvidia/cuvs/pull/713)) [@rhdong](https://github.com/rhdong)
 
 ## 🛠️ Improvements
 
-- Update score calculation for CAGRA-Q instance selection ([#938](https://github.com/rapidsai/cuvs/pull/938)) [@enp1s0](https://github.com/enp1s0)
-- [FEA] Use Native Brute Force for Sparse Pairwise KNN ([#927](https://github.com/rapidsai/cuvs/pull/927)) [@tarang-jain](https://github.com/tarang-jain)
-- use &#39;rapids-init-pip&#39; in wheel CI, other CI changes ([#917](https://github.com/rapidsai/cuvs/pull/917)) [@jameslamb](https://github.com/jameslamb)
-- ANN_BENCH: Avoid repeated calls to raft::get_device_for_address in CAGRA search ([#908](https://github.com/rapidsai/cuvs/pull/908)) [@achirkin](https://github.com/achirkin)
-- [Java] New off-heap Dataset support for CAGRA and Bruteforce ([#902](https://github.com/rapidsai/cuvs/pull/902)) [@chatman](https://github.com/chatman)
-- Finish CUDA 12.9 migration and use branch-25.06 workflows ([#901](https://github.com/rapidsai/cuvs/pull/901)) [@bdice](https://github.com/bdice)
-- Update to clang 20 ([#898](https://github.com/rapidsai/cuvs/pull/898)) [@bdice](https://github.com/bdice)
-- get Java artifacts from GitHub Actions artifact store ([#893](https://github.com/rapidsai/cuvs/pull/893)) [@jameslamb](https://github.com/jameslamb)
-- Quote head_rev in conda recipes ([#892](https://github.com/rapidsai/cuvs/pull/892)) [@bdice](https://github.com/bdice)
-- [Java] Exposing  merge API for multiple CAGRA indices ([#891](https://github.com/rapidsai/cuvs/pull/891)) [@punAhuja](https://github.com/punAhuja)
-- Expose ivf-flat centers to python/c ([#888](https://github.com/rapidsai/cuvs/pull/888)) [@benfred](https://github.com/benfred)
-- CUDA 12.9 use updated compression flags ([#887](https://github.com/rapidsai/cuvs/pull/887)) [@robertmaynard](https://github.com/robertmaynard)
-- Expose ivf-pq centers to python/c ([#881](https://github.com/rapidsai/cuvs/pull/881)) [@benfred](https://github.com/benfred)
-- Accept host inputs in python for ivf-pq build and extend ([#880](https://github.com/rapidsai/cuvs/pull/880)) [@benfred](https://github.com/benfred)
-- Add tiered_index support ([#879](https://github.com/rapidsai/cuvs/pull/879)) [@benfred](https://github.com/benfred)
-- Exclude librmm.so from auditwheel ([#878](https://github.com/rapidsai/cuvs/pull/878)) [@bdice](https://github.com/bdice)
-- update.version.sh: remove broken reference, skip most CI on PRs that only modify update-version.sh ([#875](https://github.com/rapidsai/cuvs/pull/875)) [@jameslamb](https://github.com/jameslamb)
-- Add support for Python 3.13 ([#874](https://github.com/rapidsai/cuvs/pull/874)) [@gforsyth](https://github.com/gforsyth)
-- chore: lower wheel size threshold ([#872](https://github.com/rapidsai/cuvs/pull/872)) [@gforsyth](https://github.com/gforsyth)
-- [Java] Adding support for prefiltering in CAGRA ([#870](https://github.com/rapidsai/cuvs/pull/870)) [@punAhuja](https://github.com/punAhuja)
-- Change snmg index to use updated multi gpu resource API ([#869](https://github.com/rapidsai/cuvs/pull/869)) [@jinsolp](https://github.com/jinsolp)
-- run shellcheck on all files, other small pre-commit updates ([#865](https://github.com/rapidsai/cuvs/pull/865)) [@jameslamb](https://github.com/jameslamb)
-- Fix IVF PQ build metric for CAGRA ([#862](https://github.com/rapidsai/cuvs/pull/862)) [@lowener](https://github.com/lowener)
-- ANN_BENCH: Expose parallel_mode parameter of FAISS CPU IVF implementation ([#861](https://github.com/rapidsai/cuvs/pull/861)) [@achirkin](https://github.com/achirkin)
-- Specify matplotlib version ([#839](https://github.com/rapidsai/cuvs/pull/839)) [@benfred](https://github.com/benfred)
-- Use random tmp names for index files in tests ([#837](https://github.com/rapidsai/cuvs/pull/837)) [@achirkin](https://github.com/achirkin)
-- Download build artifacts from Github for CI ([#834](https://github.com/rapidsai/cuvs/pull/834)) [@VenkateshJaya](https://github.com/VenkateshJaya)
-- Add NN-Descent return_distances functionality to python/C ([#833](https://github.com/rapidsai/cuvs/pull/833)) [@benfred](https://github.com/benfred)
-- Optimize hnsw::from_cagra&lt;GPU&gt; ([#826](https://github.com/rapidsai/cuvs/pull/826)) [@achirkin](https://github.com/achirkin)
-- Use vendored RAPIDS.cmake in example code. ([#824](https://github.com/rapidsai/cuvs/pull/824)) [@bdice](https://github.com/bdice)
-- refactor(rattler): enable strict channel priority for builds ([#823](https://github.com/rapidsai/cuvs/pull/823)) [@gforsyth](https://github.com/gforsyth)
-- Reduce device memory usage for CAGRA&#39;s graph optimization process (2-hop detour counting) ([#822](https://github.com/rapidsai/cuvs/pull/822)) [@anaruse](https://github.com/anaruse)
-- [cuvs_bench] distinguish search label from build label in data_export.py ([#818](https://github.com/rapidsai/cuvs/pull/818)) [@jiangyinzuo](https://github.com/jiangyinzuo)
-- Vendor RAPIDS.cmake ([#816](https://github.com/rapidsai/cuvs/pull/816)) [@bdice](https://github.com/bdice)
-- Update libcuvs libraft ver to 25.06 in conda env ([#808](https://github.com/rapidsai/cuvs/pull/808)) [@jinsolp](https://github.com/jinsolp)
-- Moving NN Descent class and struct declarations to `nn_descent_gnnd.hpp` ([#803](https://github.com/rapidsai/cuvs/pull/803)) [@jinsolp](https://github.com/jinsolp)
-- Remove `[@rapidsai/cuvs-build-codeowners` ([#783](https://github.com/rapidsai/cuvs/pull/783)) @KyleFromNVIDIA](https://github.com/rapidsai/cuvs-build-codeowners` ([#783](https://github.com/rapidsai/cuvs/pull/783)) @KyleFromNVIDIA)
-- Moving wheel builds to specified location and uploading build artifacts to Github ([#777](https://github.com/rapidsai/cuvs/pull/777)) [@VenkateshJaya](https://github.com/VenkateshJaya)
-- Remove unused raft cagra header in add_nodes.cuh ([#741](https://github.com/rapidsai/cuvs/pull/741)) [@jiangyinzuo](https://github.com/jiangyinzuo)
-- Expose kmeans to python ([#729](https://github.com/rapidsai/cuvs/pull/729)) [@benfred](https://github.com/benfred)
-- Update cuvs to properly create a NCCL::NCCL target ([#720](https://github.com/rapidsai/cuvs/pull/720)) [@robertmaynard](https://github.com/robertmaynard)
-- Optimize euclidean distance in host refine phase ([#689](https://github.com/rapidsai/cuvs/pull/689)) [@anstellaire](https://github.com/anstellaire)
-- Moving MG functions into unified API + `raft::device_resources_snmg` as device resource type for MG functions ([#454](https://github.com/rapidsai/cuvs/pull/454)) [@viclafargue](https://github.com/viclafargue)
-- Moving random ball cover ([#218](https://github.com/rapidsai/cuvs/pull/218)) [@cjnolet](https://github.com/cjnolet)
+- Update score calculation for CAGRA-Q instance selection ([#938](https://github.com/nvidia/cuvs/pull/938)) [@enp1s0](https://github.com/enp1s0)
+- [FEA] Use Native Brute Force for Sparse Pairwise KNN ([#927](https://github.com/nvidia/cuvs/pull/927)) [@tarang-jain](https://github.com/tarang-jain)
+- use &#39;rapids-init-pip&#39; in wheel CI, other CI changes ([#917](https://github.com/nvidia/cuvs/pull/917)) [@jameslamb](https://github.com/jameslamb)
+- ANN_BENCH: Avoid repeated calls to raft::get_device_for_address in CAGRA search ([#908](https://github.com/nvidia/cuvs/pull/908)) [@achirkin](https://github.com/achirkin)
+- [Java] New off-heap Dataset support for CAGRA and Bruteforce ([#902](https://github.com/nvidia/cuvs/pull/902)) [@chatman](https://github.com/chatman)
+- Finish CUDA 12.9 migration and use branch-25.06 workflows ([#901](https://github.com/nvidia/cuvs/pull/901)) [@bdice](https://github.com/bdice)
+- Update to clang 20 ([#898](https://github.com/nvidia/cuvs/pull/898)) [@bdice](https://github.com/bdice)
+- get Java artifacts from GitHub Actions artifact store ([#893](https://github.com/nvidia/cuvs/pull/893)) [@jameslamb](https://github.com/jameslamb)
+- Quote head_rev in conda recipes ([#892](https://github.com/nvidia/cuvs/pull/892)) [@bdice](https://github.com/bdice)
+- [Java] Exposing  merge API for multiple CAGRA indices ([#891](https://github.com/nvidia/cuvs/pull/891)) [@punAhuja](https://github.com/punAhuja)
+- Expose ivf-flat centers to python/c ([#888](https://github.com/nvidia/cuvs/pull/888)) [@benfred](https://github.com/benfred)
+- CUDA 12.9 use updated compression flags ([#887](https://github.com/nvidia/cuvs/pull/887)) [@robertmaynard](https://github.com/robertmaynard)
+- Expose ivf-pq centers to python/c ([#881](https://github.com/nvidia/cuvs/pull/881)) [@benfred](https://github.com/benfred)
+- Accept host inputs in python for ivf-pq build and extend ([#880](https://github.com/nvidia/cuvs/pull/880)) [@benfred](https://github.com/benfred)
+- Add tiered_index support ([#879](https://github.com/nvidia/cuvs/pull/879)) [@benfred](https://github.com/benfred)
+- Exclude librmm.so from auditwheel ([#878](https://github.com/nvidia/cuvs/pull/878)) [@bdice](https://github.com/bdice)
+- update.version.sh: remove broken reference, skip most CI on PRs that only modify update-version.sh ([#875](https://github.com/nvidia/cuvs/pull/875)) [@jameslamb](https://github.com/jameslamb)
+- Add support for Python 3.13 ([#874](https://github.com/nvidia/cuvs/pull/874)) [@gforsyth](https://github.com/gforsyth)
+- chore: lower wheel size threshold ([#872](https://github.com/nvidia/cuvs/pull/872)) [@gforsyth](https://github.com/gforsyth)
+- [Java] Adding support for prefiltering in CAGRA ([#870](https://github.com/nvidia/cuvs/pull/870)) [@punAhuja](https://github.com/punAhuja)
+- Change snmg index to use updated multi gpu resource API ([#869](https://github.com/nvidia/cuvs/pull/869)) [@jinsolp](https://github.com/jinsolp)
+- run shellcheck on all files, other small pre-commit updates ([#865](https://github.com/nvidia/cuvs/pull/865)) [@jameslamb](https://github.com/jameslamb)
+- Fix IVF PQ build metric for CAGRA ([#862](https://github.com/nvidia/cuvs/pull/862)) [@lowener](https://github.com/lowener)
+- ANN_BENCH: Expose parallel_mode parameter of FAISS CPU IVF implementation ([#861](https://github.com/nvidia/cuvs/pull/861)) [@achirkin](https://github.com/achirkin)
+- Specify matplotlib version ([#839](https://github.com/nvidia/cuvs/pull/839)) [@benfred](https://github.com/benfred)
+- Use random tmp names for index files in tests ([#837](https://github.com/nvidia/cuvs/pull/837)) [@achirkin](https://github.com/achirkin)
+- Download build artifacts from Github for CI ([#834](https://github.com/nvidia/cuvs/pull/834)) [@VenkateshJaya](https://github.com/VenkateshJaya)
+- Add NN-Descent return_distances functionality to python/C ([#833](https://github.com/nvidia/cuvs/pull/833)) [@benfred](https://github.com/benfred)
+- Optimize hnsw::from_cagra&lt;GPU&gt; ([#826](https://github.com/nvidia/cuvs/pull/826)) [@achirkin](https://github.com/achirkin)
+- Use vendored RAPIDS.cmake in example code. ([#824](https://github.com/nvidia/cuvs/pull/824)) [@bdice](https://github.com/bdice)
+- refactor(rattler): enable strict channel priority for builds ([#823](https://github.com/nvidia/cuvs/pull/823)) [@gforsyth](https://github.com/gforsyth)
+- Reduce device memory usage for CAGRA&#39;s graph optimization process (2-hop detour counting) ([#822](https://github.com/nvidia/cuvs/pull/822)) [@anaruse](https://github.com/anaruse)
+- [cuvs_bench] distinguish search label from build label in data_export.py ([#818](https://github.com/nvidia/cuvs/pull/818)) [@jiangyinzuo](https://github.com/jiangyinzuo)
+- Vendor RAPIDS.cmake ([#816](https://github.com/nvidia/cuvs/pull/816)) [@bdice](https://github.com/bdice)
+- Update libcuvs libraft ver to 25.06 in conda env ([#808](https://github.com/nvidia/cuvs/pull/808)) [@jinsolp](https://github.com/jinsolp)
+- Moving NN Descent class and struct declarations to `nn_descent_gnnd.hpp` ([#803](https://github.com/nvidia/cuvs/pull/803)) [@jinsolp](https://github.com/jinsolp)
+- Remove `[@rapidsai/cuvs-build-codeowners` ([#783](https://github.com/nvidia/cuvs/pull/783)) @KyleFromNVIDIA](https://github.com/nvidia/cuvs-build-codeowners` ([#783](https://github.com/nvidia/cuvs/pull/783)) @KyleFromNVIDIA)
+- Moving wheel builds to specified location and uploading build artifacts to Github ([#777](https://github.com/nvidia/cuvs/pull/777)) [@VenkateshJaya](https://github.com/VenkateshJaya)
+- Remove unused raft cagra header in add_nodes.cuh ([#741](https://github.com/nvidia/cuvs/pull/741)) [@jiangyinzuo](https://github.com/jiangyinzuo)
+- Expose kmeans to python ([#729](https://github.com/nvidia/cuvs/pull/729)) [@benfred](https://github.com/benfred)
+- Update cuvs to properly create a NCCL::NCCL target ([#720](https://github.com/nvidia/cuvs/pull/720)) [@robertmaynard](https://github.com/robertmaynard)
+- Optimize euclidean distance in host refine phase ([#689](https://github.com/nvidia/cuvs/pull/689)) [@anstellaire](https://github.com/anstellaire)
+- Moving MG functions into unified API + `raft::device_resources_snmg` as device resource type for MG functions ([#454](https://github.com/nvidia/cuvs/pull/454)) [@viclafargue](https://github.com/viclafargue)
+- Moving random ball cover ([#218](https://github.com/nvidia/cuvs/pull/218)) [@cjnolet](https://github.com/cjnolet)
 
 # cuvs 25.04.00 (9 Apr 2025)
 
 ## 🚨 Breaking Changes
 
-- Use new rapids-logger library ([#644](https://github.com/rapidsai/cuvs/pull/644)) [@vyasr](https://github.com/vyasr)
+- Use new rapids-logger library ([#644](https://github.com/nvidia/cuvs/pull/644)) [@vyasr](https://github.com/vyasr)
 
 ## 🐛 Bug Fixes
 
-- Use new build_patch_only argument ([#780](https://github.com/rapidsai/cuvs/pull/780)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
-- Fix resource import so generate_groundtruth works ([#774](https://github.com/rapidsai/cuvs/pull/774)) [@nvrohanv](https://github.com/nvrohanv)
-- Relax max duplicates in batched NN Descent ([#770](https://github.com/rapidsai/cuvs/pull/770)) [@jinsolp](https://github.com/jinsolp)
-- [BUG] Fix graph index sorting in CAGRA graph build by NN Descent ([#763](https://github.com/rapidsai/cuvs/pull/763)) [@enp1s0](https://github.com/enp1s0)
-- cuvs-bench-cpu: avoid &#39;mkl&#39; dependency on aarch64 ([#750](https://github.com/rapidsai/cuvs/pull/750)) [@jameslamb](https://github.com/jameslamb)
-- ANN_BENCH: Fix segfault in CAGRA wrapper when moving its graph ([#733](https://github.com/rapidsai/cuvs/pull/733)) [@achirkin](https://github.com/achirkin)
-- Fix duplicate indices in batch NN Descent ([#702](https://github.com/rapidsai/cuvs/pull/702)) [@jinsolp](https://github.com/jinsolp)
+- Use new build_patch_only argument ([#780](https://github.com/nvidia/cuvs/pull/780)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
+- Fix resource import so generate_groundtruth works ([#774](https://github.com/nvidia/cuvs/pull/774)) [@nvrohanv](https://github.com/nvrohanv)
+- Relax max duplicates in batched NN Descent ([#770](https://github.com/nvidia/cuvs/pull/770)) [@jinsolp](https://github.com/jinsolp)
+- [BUG] Fix graph index sorting in CAGRA graph build by NN Descent ([#763](https://github.com/nvidia/cuvs/pull/763)) [@enp1s0](https://github.com/enp1s0)
+- cuvs-bench-cpu: avoid &#39;mkl&#39; dependency on aarch64 ([#750](https://github.com/nvidia/cuvs/pull/750)) [@jameslamb](https://github.com/jameslamb)
+- ANN_BENCH: Fix segfault in CAGRA wrapper when moving its graph ([#733](https://github.com/nvidia/cuvs/pull/733)) [@achirkin](https://github.com/achirkin)
+- Fix duplicate indices in batch NN Descent ([#702](https://github.com/nvidia/cuvs/pull/702)) [@jinsolp](https://github.com/jinsolp)
 
 ## 📖 Documentation
 
-- Go module - Usage docs ([#779](https://github.com/rapidsai/cuvs/pull/779)) [@AyodeAwe](https://github.com/AyodeAwe)
+- Go module - Usage docs ([#779](https://github.com/nvidia/cuvs/pull/779)) [@AyodeAwe](https://github.com/AyodeAwe)
 
 ## 🚀 New Features
 
-- `L2SqrtExpanded` metric support for NN Descent ([#790](https://github.com/rapidsai/cuvs/pull/790)) [@jinsolp](https://github.com/jinsolp)
-- CAGRA search: int64_t indices in the API ([#769](https://github.com/rapidsai/cuvs/pull/769)) [@achirkin](https://github.com/achirkin)
-- Expose IVF PQ params in CAGRA C API ([#768](https://github.com/rapidsai/cuvs/pull/768)) [@divyegala](https://github.com/divyegala)
-- [Feat] Expose search with bitset C API for Brute Force ([#717](https://github.com/rapidsai/cuvs/pull/717)) [@rhdong](https://github.com/rhdong)
-- Diskann Benchmarking Wrapper ([#260](https://github.com/rapidsai/cuvs/pull/260)) [@tarang-jain](https://github.com/tarang-jain)
+- `L2SqrtExpanded` metric support for NN Descent ([#790](https://github.com/nvidia/cuvs/pull/790)) [@jinsolp](https://github.com/jinsolp)
+- CAGRA search: int64_t indices in the API ([#769](https://github.com/nvidia/cuvs/pull/769)) [@achirkin](https://github.com/achirkin)
+- Expose IVF PQ params in CAGRA C API ([#768](https://github.com/nvidia/cuvs/pull/768)) [@divyegala](https://github.com/divyegala)
+- [Feat] Expose search with bitset C API for Brute Force ([#717](https://github.com/nvidia/cuvs/pull/717)) [@rhdong](https://github.com/rhdong)
+- Diskann Benchmarking Wrapper ([#260](https://github.com/nvidia/cuvs/pull/260)) [@tarang-jain](https://github.com/tarang-jain)
 
 ## 🛠️ Improvements
 
-- Use L4 instead of V100 ([#797](https://github.com/rapidsai/cuvs/pull/797)) [@bdice](https://github.com/bdice)
-- Rust publishing updates ([#789](https://github.com/rapidsai/cuvs/pull/789)) [@AyodeAwe](https://github.com/AyodeAwe)
-- Removing print statement ([#786](https://github.com/rapidsai/cuvs/pull/786)) [@jinsolp](https://github.com/jinsolp)
-- Support passing a dataset to `FromCagra` ([#767](https://github.com/rapidsai/cuvs/pull/767)) [@ajit283](https://github.com/ajit283)
-- add function to build specific gpu-arches [REVIEW] ([#766](https://github.com/rapidsai/cuvs/pull/766)) [@nvrohanv](https://github.com/nvrohanv)
-- feat(libcuvs): port libcuvs to rattler-build ([#751](https://github.com/rapidsai/cuvs/pull/751)) [@gforsyth](https://github.com/gforsyth)
-- Consolidate Conda environment for Rust ([#745](https://github.com/rapidsai/cuvs/pull/745)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
-- rename &#39;sklearn&#39; conda dependency to &#39;scikit-learn&#39; ([#743](https://github.com/rapidsai/cuvs/pull/743)) [@jameslamb](https://github.com/jameslamb)
-- Reduce memory consumption of scalar quantizer ([#736](https://github.com/rapidsai/cuvs/pull/736)) [@mfoerste4](https://github.com/mfoerste4)
-- Use conda-build instead of conda-mambabuild ([#723](https://github.com/rapidsai/cuvs/pull/723)) [@bdice](https://github.com/bdice)
-- Expand NVTX annotations in CAGRA build and HNSW ([#711](https://github.com/rapidsai/cuvs/pull/711)) [@achirkin](https://github.com/achirkin)
-- Replace `cub::TransformInputIterator` with `thrust::transform_iterator` ([#707](https://github.com/rapidsai/cuvs/pull/707)) [@miscco](https://github.com/miscco)
-- Consolidate more Conda solves in CI ([#701](https://github.com/rapidsai/cuvs/pull/701)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
-- Require CMake 3.30.4 ([#691](https://github.com/rapidsai/cuvs/pull/691)) [@robertmaynard](https://github.com/robertmaynard)
-- Create Conda CI test env in one step ([#684](https://github.com/rapidsai/cuvs/pull/684)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
-- Use shared-workflows branch-25.04 ([#669](https://github.com/rapidsai/cuvs/pull/669)) [@bdice](https://github.com/bdice)
-- Add `shellcheck` to pre-commit and fix warnings ([#662](https://github.com/rapidsai/cuvs/pull/662)) [@gforsyth](https://github.com/gforsyth)
-- Add build_type input field for test.yaml ([#661](https://github.com/rapidsai/cuvs/pull/661)) [@gforsyth](https://github.com/gforsyth)
-- Use `rapids-pip-retry` in CI jobs that might need retries ([#648](https://github.com/rapidsai/cuvs/pull/648)) [@gforsyth](https://github.com/gforsyth)
-- Use new rapids-logger library ([#644](https://github.com/rapidsai/cuvs/pull/644)) [@vyasr](https://github.com/vyasr)
-- disallow fallback to Make in Python builds ([#636](https://github.com/rapidsai/cuvs/pull/636)) [@jameslamb](https://github.com/jameslamb)
-- Add `verify-codeowners` hook ([#633](https://github.com/rapidsai/cuvs/pull/633)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
-- Forward-merge branch-25.02 into branch-25.04 ([#632](https://github.com/rapidsai/cuvs/pull/632)) [@bdice](https://github.com/bdice)
-- Account for RAFT sparse types updates ([#629](https://github.com/rapidsai/cuvs/pull/629)) [@divyegala](https://github.com/divyegala)
-- Migrate to NVKS for amd64 CI runners ([#627](https://github.com/rapidsai/cuvs/pull/627)) [@bdice](https://github.com/bdice)
+- Use L4 instead of V100 ([#797](https://github.com/nvidia/cuvs/pull/797)) [@bdice](https://github.com/bdice)
+- Rust publishing updates ([#789](https://github.com/nvidia/cuvs/pull/789)) [@AyodeAwe](https://github.com/AyodeAwe)
+- Removing print statement ([#786](https://github.com/nvidia/cuvs/pull/786)) [@jinsolp](https://github.com/jinsolp)
+- Support passing a dataset to `FromCagra` ([#767](https://github.com/nvidia/cuvs/pull/767)) [@ajit283](https://github.com/ajit283)
+- add function to build specific gpu-arches [REVIEW] ([#766](https://github.com/nvidia/cuvs/pull/766)) [@nvrohanv](https://github.com/nvrohanv)
+- feat(libcuvs): port libcuvs to rattler-build ([#751](https://github.com/nvidia/cuvs/pull/751)) [@gforsyth](https://github.com/gforsyth)
+- Consolidate Conda environment for Rust ([#745](https://github.com/nvidia/cuvs/pull/745)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
+- rename &#39;sklearn&#39; conda dependency to &#39;scikit-learn&#39; ([#743](https://github.com/nvidia/cuvs/pull/743)) [@jameslamb](https://github.com/jameslamb)
+- Reduce memory consumption of scalar quantizer ([#736](https://github.com/nvidia/cuvs/pull/736)) [@mfoerste4](https://github.com/mfoerste4)
+- Use conda-build instead of conda-mambabuild ([#723](https://github.com/nvidia/cuvs/pull/723)) [@bdice](https://github.com/bdice)
+- Expand NVTX annotations in CAGRA build and HNSW ([#711](https://github.com/nvidia/cuvs/pull/711)) [@achirkin](https://github.com/achirkin)
+- Replace `cub::TransformInputIterator` with `thrust::transform_iterator` ([#707](https://github.com/nvidia/cuvs/pull/707)) [@miscco](https://github.com/miscco)
+- Consolidate more Conda solves in CI ([#701](https://github.com/nvidia/cuvs/pull/701)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
+- Require CMake 3.30.4 ([#691](https://github.com/nvidia/cuvs/pull/691)) [@robertmaynard](https://github.com/robertmaynard)
+- Create Conda CI test env in one step ([#684](https://github.com/nvidia/cuvs/pull/684)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
+- Use shared-workflows branch-25.04 ([#669](https://github.com/nvidia/cuvs/pull/669)) [@bdice](https://github.com/bdice)
+- Add `shellcheck` to pre-commit and fix warnings ([#662](https://github.com/nvidia/cuvs/pull/662)) [@gforsyth](https://github.com/gforsyth)
+- Add build_type input field for test.yaml ([#661](https://github.com/nvidia/cuvs/pull/661)) [@gforsyth](https://github.com/gforsyth)
+- Use `rapids-pip-retry` in CI jobs that might need retries ([#648](https://github.com/nvidia/cuvs/pull/648)) [@gforsyth](https://github.com/gforsyth)
+- Use new rapids-logger library ([#644](https://github.com/nvidia/cuvs/pull/644)) [@vyasr](https://github.com/vyasr)
+- disallow fallback to Make in Python builds ([#636](https://github.com/nvidia/cuvs/pull/636)) [@jameslamb](https://github.com/jameslamb)
+- Add `verify-codeowners` hook ([#633](https://github.com/nvidia/cuvs/pull/633)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
+- Forward-merge branch-25.02 into branch-25.04 ([#632](https://github.com/nvidia/cuvs/pull/632)) [@bdice](https://github.com/bdice)
+- Account for RAFT sparse types updates ([#629](https://github.com/nvidia/cuvs/pull/629)) [@divyegala](https://github.com/divyegala)
+- Migrate to NVKS for amd64 CI runners ([#627](https://github.com/nvidia/cuvs/pull/627)) [@bdice](https://github.com/bdice)
 
 # cuvs 25.02.00 (13 Feb 2025)
 
 ## 🚨 Breaking Changes
 
-- update pip devcontainers to UCX 1.18, small update-version.sh fixes ([#604](https://github.com/rapidsai/cuvs/pull/604)) [@jameslamb](https://github.com/jameslamb)
-- Improve the performance of CAGRA new vector addition with the default params ([#569](https://github.com/rapidsai/cuvs/pull/569)) [@enp1s0](https://github.com/enp1s0)
-- Update for raft logger changes ([#540](https://github.com/rapidsai/cuvs/pull/540)) [@vyasr](https://github.com/vyasr)
+- update pip devcontainers to UCX 1.18, small update-version.sh fixes ([#604](https://github.com/nvidia/cuvs/pull/604)) [@jameslamb](https://github.com/jameslamb)
+- Improve the performance of CAGRA new vector addition with the default params ([#569](https://github.com/nvidia/cuvs/pull/569)) [@enp1s0](https://github.com/enp1s0)
+- Update for raft logger changes ([#540](https://github.com/nvidia/cuvs/pull/540)) [@vyasr](https://github.com/vyasr)
 
 ## 🐛 Bug Fixes
 
-- Fix ann-bench dataset blob integer overflow leading to incorrect data copy beyond 4B elems ([#671](https://github.com/rapidsai/cuvs/pull/671)) [@achirkin](https://github.com/achirkin)
-- Fix ann-bench deadlocking on HNSW destruction due to task locks ([#667](https://github.com/rapidsai/cuvs/pull/667)) [@achirkin](https://github.com/achirkin)
-- cuvs-bench Fixes ([#654](https://github.com/rapidsai/cuvs/pull/654)) [@tarang-jain](https://github.com/tarang-jain)
-- Fix std::lock_guard use for gcc 14 support ([#639](https://github.com/rapidsai/cuvs/pull/639)) [@enp1s0](https://github.com/enp1s0)
-- Fix indexing bug when using parallelism to build CPU hierarchy in HNSW ([#620](https://github.com/rapidsai/cuvs/pull/620)) [@divyegala](https://github.com/divyegala)
-- add runtime dependency on libcuvs in cuvs wheels ([#615](https://github.com/rapidsai/cuvs/pull/615)) [@jameslamb](https://github.com/jameslamb)
-- Temporarily skip CUDA 11 wheel CI ([#599](https://github.com/rapidsai/cuvs/pull/599)) [@bdice](https://github.com/bdice)
-- [Fix] l2_exp random fail in half-float32 mixed precision on self-neighboring ([#596](https://github.com/rapidsai/cuvs/pull/596)) [@rhdong](https://github.com/rhdong)
-- Add CAGRA InnerProduct test and fix a bug ([#595](https://github.com/rapidsai/cuvs/pull/595)) [@enp1s0](https://github.com/enp1s0)
-- fix cuvs_bench.run --groups options ([#592](https://github.com/rapidsai/cuvs/pull/592)) [@jiangyinzuo](https://github.com/jiangyinzuo)
-- Fix cagra_hnsw serialization when dataset is not part of index ([#591](https://github.com/rapidsai/cuvs/pull/591)) [@tfeher](https://github.com/tfeher)
-- fix create_pointset for throughput mode ([#589](https://github.com/rapidsai/cuvs/pull/589)) [@jiangyinzuo](https://github.com/jiangyinzuo)
-- Fix the use of constexpr in the dynamic batching header ([#582](https://github.com/rapidsai/cuvs/pull/582)) [@achirkin](https://github.com/achirkin)
-- Reduce the recall threshold for IVF-PQ low-precision LUT inner product  tests ([#573](https://github.com/rapidsai/cuvs/pull/573)) [@achirkin](https://github.com/achirkin)
-- Small fixes to docs and pairwise distances ([#570](https://github.com/rapidsai/cuvs/pull/570)) [@cjnolet](https://github.com/cjnolet)
-- [BUG] Fix CAGRA graph optimization bug ([#565](https://github.com/rapidsai/cuvs/pull/565)) [@enp1s0](https://github.com/enp1s0)
-- Fix broken link to python doc ([#564](https://github.com/rapidsai/cuvs/pull/564)) [@lowener](https://github.com/lowener)
-- Fix cagra::extend error message ([#532](https://github.com/rapidsai/cuvs/pull/532)) [@enp1s0](https://github.com/enp1s0)
-- Fix Grace-specific issues in CAGRA ([#527](https://github.com/rapidsai/cuvs/pull/527)) [@achirkin](https://github.com/achirkin)
+- Fix ann-bench dataset blob integer overflow leading to incorrect data copy beyond 4B elems ([#671](https://github.com/nvidia/cuvs/pull/671)) [@achirkin](https://github.com/achirkin)
+- Fix ann-bench deadlocking on HNSW destruction due to task locks ([#667](https://github.com/nvidia/cuvs/pull/667)) [@achirkin](https://github.com/achirkin)
+- cuvs-bench Fixes ([#654](https://github.com/nvidia/cuvs/pull/654)) [@tarang-jain](https://github.com/tarang-jain)
+- Fix std::lock_guard use for gcc 14 support ([#639](https://github.com/nvidia/cuvs/pull/639)) [@enp1s0](https://github.com/enp1s0)
+- Fix indexing bug when using parallelism to build CPU hierarchy in HNSW ([#620](https://github.com/nvidia/cuvs/pull/620)) [@divyegala](https://github.com/divyegala)
+- add runtime dependency on libcuvs in cuvs wheels ([#615](https://github.com/nvidia/cuvs/pull/615)) [@jameslamb](https://github.com/jameslamb)
+- Temporarily skip CUDA 11 wheel CI ([#599](https://github.com/nvidia/cuvs/pull/599)) [@bdice](https://github.com/bdice)
+- [Fix] l2_exp random fail in half-float32 mixed precision on self-neighboring ([#596](https://github.com/nvidia/cuvs/pull/596)) [@rhdong](https://github.com/rhdong)
+- Add CAGRA InnerProduct test and fix a bug ([#595](https://github.com/nvidia/cuvs/pull/595)) [@enp1s0](https://github.com/enp1s0)
+- fix cuvs_bench.run --groups options ([#592](https://github.com/nvidia/cuvs/pull/592)) [@jiangyinzuo](https://github.com/jiangyinzuo)
+- Fix cagra_hnsw serialization when dataset is not part of index ([#591](https://github.com/nvidia/cuvs/pull/591)) [@tfeher](https://github.com/tfeher)
+- fix create_pointset for throughput mode ([#589](https://github.com/nvidia/cuvs/pull/589)) [@jiangyinzuo](https://github.com/jiangyinzuo)
+- Fix the use of constexpr in the dynamic batching header ([#582](https://github.com/nvidia/cuvs/pull/582)) [@achirkin](https://github.com/achirkin)
+- Reduce the recall threshold for IVF-PQ low-precision LUT inner product  tests ([#573](https://github.com/nvidia/cuvs/pull/573)) [@achirkin](https://github.com/achirkin)
+- Small fixes to docs and pairwise distances ([#570](https://github.com/nvidia/cuvs/pull/570)) [@cjnolet](https://github.com/cjnolet)
+- [BUG] Fix CAGRA graph optimization bug ([#565](https://github.com/nvidia/cuvs/pull/565)) [@enp1s0](https://github.com/enp1s0)
+- Fix broken link to python doc ([#564](https://github.com/nvidia/cuvs/pull/564)) [@lowener](https://github.com/lowener)
+- Fix cagra::extend error message ([#532](https://github.com/nvidia/cuvs/pull/532)) [@enp1s0](https://github.com/enp1s0)
+- Fix Grace-specific issues in CAGRA ([#527](https://github.com/nvidia/cuvs/pull/527)) [@achirkin](https://github.com/achirkin)
 
 ## 📖 Documentation
 
-- add docs for nn_descent ([#668](https://github.com/rapidsai/cuvs/pull/668)) [@Intron7](https://github.com/Intron7)
-- Fixing small typo in cuvs bench docs ([#586](https://github.com/rapidsai/cuvs/pull/586)) [@cjnolet](https://github.com/cjnolet)
-- Fix typos in README ([#543](https://github.com/rapidsai/cuvs/pull/543)) [@nvanbenschoten](https://github.com/nvanbenschoten)
-- Use nvidia-sphinx-theme for docs ([#528](https://github.com/rapidsai/cuvs/pull/528)) [@benfred](https://github.com/benfred)
+- add docs for nn_descent ([#668](https://github.com/nvidia/cuvs/pull/668)) [@Intron7](https://github.com/Intron7)
+- Fixing small typo in cuvs bench docs ([#586](https://github.com/nvidia/cuvs/pull/586)) [@cjnolet](https://github.com/cjnolet)
+- Fix typos in README ([#543](https://github.com/nvidia/cuvs/pull/543)) [@nvanbenschoten](https://github.com/nvanbenschoten)
+- Use nvidia-sphinx-theme for docs ([#528](https://github.com/nvidia/cuvs/pull/528)) [@benfred](https://github.com/benfred)
 
 ## 🚀 New Features
 
-- Add deep-100M to datasets.yaml for cuvs-bench ([#670](https://github.com/rapidsai/cuvs/pull/670)) [@tarang-jain](https://github.com/tarang-jain)
-- Expose configuration singleton as a global context for ann-bench algos ([#647](https://github.com/rapidsai/cuvs/pull/647)) [@achirkin](https://github.com/achirkin)
-- ANN_BENCH enhanced dataset support ([#624](https://github.com/rapidsai/cuvs/pull/624)) [@achirkin](https://github.com/achirkin)
-- [Feat] Add Support for Index `merge` in CAGRA ([#618](https://github.com/rapidsai/cuvs/pull/618)) [@rhdong](https://github.com/rhdong)
-- HNSW GPU hierarchy ([#616](https://github.com/rapidsai/cuvs/pull/616)) [@divyegala](https://github.com/divyegala)
-- CAGRA binary Hamming distance support ([#610](https://github.com/rapidsai/cuvs/pull/610)) [@enp1s0](https://github.com/enp1s0)
-- Add cuda 12.8 support ([#605](https://github.com/rapidsai/cuvs/pull/605)) [@robertmaynard](https://github.com/robertmaynard)
-- Add support for refinement with `uint32_t` index type ([#563](https://github.com/rapidsai/cuvs/pull/563)) [@lowener](https://github.com/lowener)
-- [Feat] Support `bitset` filter for Brute Force ([#560](https://github.com/rapidsai/cuvs/pull/560)) [@rhdong](https://github.com/rhdong)
-- Remove upper bounds on cuda-python to allow 12.6.2 and 11.8.5 ([#508](https://github.com/rapidsai/cuvs/pull/508)) [@bdice](https://github.com/bdice)
+- Add deep-100M to datasets.yaml for cuvs-bench ([#670](https://github.com/nvidia/cuvs/pull/670)) [@tarang-jain](https://github.com/tarang-jain)
+- Expose configuration singleton as a global context for ann-bench algos ([#647](https://github.com/nvidia/cuvs/pull/647)) [@achirkin](https://github.com/achirkin)
+- ANN_BENCH enhanced dataset support ([#624](https://github.com/nvidia/cuvs/pull/624)) [@achirkin](https://github.com/achirkin)
+- [Feat] Add Support for Index `merge` in CAGRA ([#618](https://github.com/nvidia/cuvs/pull/618)) [@rhdong](https://github.com/rhdong)
+- HNSW GPU hierarchy ([#616](https://github.com/nvidia/cuvs/pull/616)) [@divyegala](https://github.com/divyegala)
+- CAGRA binary Hamming distance support ([#610](https://github.com/nvidia/cuvs/pull/610)) [@enp1s0](https://github.com/enp1s0)
+- Add cuda 12.8 support ([#605](https://github.com/nvidia/cuvs/pull/605)) [@robertmaynard](https://github.com/robertmaynard)
+- Add support for refinement with `uint32_t` index type ([#563](https://github.com/nvidia/cuvs/pull/563)) [@lowener](https://github.com/lowener)
+- [Feat] Support `bitset` filter for Brute Force ([#560](https://github.com/nvidia/cuvs/pull/560)) [@rhdong](https://github.com/rhdong)
+- Remove upper bounds on cuda-python to allow 12.6.2 and 11.8.5 ([#508](https://github.com/nvidia/cuvs/pull/508)) [@bdice](https://github.com/bdice)
 
 ## 🛠️ Improvements
 
-- Add filtering to python for ivf_flat ([#664](https://github.com/rapidsai/cuvs/pull/664)) [@benfred](https://github.com/benfred)
-- Expose binary quantizer to C and Python ([#660](https://github.com/rapidsai/cuvs/pull/660)) [@benfred](https://github.com/benfred)
-- Add telemetry ([#652](https://github.com/rapidsai/cuvs/pull/652)) [@gforsyth](https://github.com/gforsyth)
-- Revert docs builds to CI latest tag. ([#643](https://github.com/rapidsai/cuvs/pull/643)) [@bdice](https://github.com/bdice)
-- Add float16 support in python for cagra/brute_force/ivf_pq and scalar quantizer ([#637](https://github.com/rapidsai/cuvs/pull/637)) [@benfred](https://github.com/benfred)
-- Expose NN-Descent to C and Python ([#635](https://github.com/rapidsai/cuvs/pull/635)) [@benfred](https://github.com/benfred)
-- Revert CUDA 12.8 shared workflow branch changes ([#630](https://github.com/rapidsai/cuvs/pull/630)) [@vyasr](https://github.com/vyasr)
-- cuvs-java: Rework the api to be Java 21 friendly ([#628](https://github.com/rapidsai/cuvs/pull/628)) [@ChrisHegarty](https://github.com/ChrisHegarty)
-- Build and test with CUDA 12.8.0 ([#621](https://github.com/rapidsai/cuvs/pull/621)) [@bdice](https://github.com/bdice)
-- Add Scalar Quantization to the c and python apis ([#617](https://github.com/rapidsai/cuvs/pull/617)) [@benfred](https://github.com/benfred)
-- Iteratively build graph index ([#612](https://github.com/rapidsai/cuvs/pull/612)) [@anaruse](https://github.com/anaruse)
-- update pip devcontainers to UCX 1.18, small update-version.sh fixes ([#604](https://github.com/rapidsai/cuvs/pull/604)) [@jameslamb](https://github.com/jameslamb)
-- Reduce CAGRA test runtime ([#602](https://github.com/rapidsai/cuvs/pull/602)) [@bdice](https://github.com/bdice)
-- Revert &quot;Temporarily skip CUDA 11 wheel CI&quot; ([#601](https://github.com/rapidsai/cuvs/pull/601)) [@bdice](https://github.com/bdice)
-- introduce libcuvs wheels ([#594](https://github.com/rapidsai/cuvs/pull/594)) [@jameslamb](https://github.com/jameslamb)
-- Normalize whitespace ([#593](https://github.com/rapidsai/cuvs/pull/593)) [@bdice](https://github.com/bdice)
-- Rename test to tests. ([#590](https://github.com/rapidsai/cuvs/pull/590)) [@bdice](https://github.com/bdice)
-- Use cuda.bindings layout. ([#588](https://github.com/rapidsai/cuvs/pull/588)) [@bdice](https://github.com/bdice)
-- run_cuvs_pytests.sh uses proper test dir ([#584](https://github.com/rapidsai/cuvs/pull/584)) [@robertmaynard](https://github.com/robertmaynard)
-- expose col-major bfknn to python ([#575](https://github.com/rapidsai/cuvs/pull/575)) [@benfred](https://github.com/benfred)
-- Run cuvs-bench pytests and end-to-end tests in CI ([#574](https://github.com/rapidsai/cuvs/pull/574)) [@dantegd](https://github.com/dantegd)
-- Expose col-major pairwise distances to python ([#572](https://github.com/rapidsai/cuvs/pull/572)) [@benfred](https://github.com/benfred)
-- Improve the performance of CAGRA new vector addition with the default params ([#569](https://github.com/rapidsai/cuvs/pull/569)) [@enp1s0](https://github.com/enp1s0)
-- Improve filtering documentation ([#568](https://github.com/rapidsai/cuvs/pull/568)) [@lowener](https://github.com/lowener)
-- Use GCC 13 in CUDA 12 conda builds. ([#567](https://github.com/rapidsai/cuvs/pull/567)) [@bdice](https://github.com/bdice)
-- Allow brute_force::build to work on host matrix dataset ([#562](https://github.com/rapidsai/cuvs/pull/562)) [@benfred](https://github.com/benfred)
-- FAISS with cuVS enabled in cuvs-bench ([#561](https://github.com/rapidsai/cuvs/pull/561)) [@tarang-jain](https://github.com/tarang-jain)
-- Vamana build improvement and added docs ([#558](https://github.com/rapidsai/cuvs/pull/558)) [@bkarsin](https://github.com/bkarsin)
-- Support raft&#39;s logger targets ([#557](https://github.com/rapidsai/cuvs/pull/557)) [@vyasr](https://github.com/vyasr)
-- Get Breathe from conda again ([#554](https://github.com/rapidsai/cuvs/pull/554)) [@vyasr](https://github.com/vyasr)
-- Check if nightlies have succeeded recently enough ([#548](https://github.com/rapidsai/cuvs/pull/548)) [@vyasr](https://github.com/vyasr)
-- Add support for float16 to the python pairwise distance api ([#547](https://github.com/rapidsai/cuvs/pull/547)) [@benfred](https://github.com/benfred)
-- Additional Distances for CAGRA C and Python API ([#546](https://github.com/rapidsai/cuvs/pull/546)) [@tarang-jain](https://github.com/tarang-jain)
-- remove setup.cfg files, other packaging cleanup ([#544](https://github.com/rapidsai/cuvs/pull/544)) [@jameslamb](https://github.com/jameslamb)
-- Fix CI for python cuvs_bench ([#541](https://github.com/rapidsai/cuvs/pull/541)) [@benfred](https://github.com/benfred)
-- Update for raft logger changes ([#540](https://github.com/rapidsai/cuvs/pull/540)) [@vyasr](https://github.com/vyasr)
-- Change brute_force api to match ivf*/cagra ([#536](https://github.com/rapidsai/cuvs/pull/536)) [@benfred](https://github.com/benfred)
-- Branch 25.02 merge 24.12 ([#526](https://github.com/rapidsai/cuvs/pull/526)) [@benfred](https://github.com/benfred)
-- Update cuda-python lower bounds to 12.6.2 / 11.8.5 ([#524](https://github.com/rapidsai/cuvs/pull/524)) [@bdice](https://github.com/bdice)
-- Automatic adjustment of itopk size according to filtering rate ([#509](https://github.com/rapidsai/cuvs/pull/509)) [@anaruse](https://github.com/anaruse)
-- prefer system install of UCX in devcontainers ([#501](https://github.com/rapidsai/cuvs/pull/501)) [@jameslamb](https://github.com/jameslamb)
-- Adapt to rmm logger changes ([#499](https://github.com/rapidsai/cuvs/pull/499)) [@vyasr](https://github.com/vyasr)
-- Require approval to run CI on draft PRs ([#498](https://github.com/rapidsai/cuvs/pull/498)) [@bdice](https://github.com/bdice)
-- Remove RAFT BUILD_ANN_BENCH option ([#497](https://github.com/rapidsai/cuvs/pull/497)) [@bdice](https://github.com/bdice)
-- Update example code fetching rapids-cmake to use CUVS instead of RAFT ([#493](https://github.com/rapidsai/cuvs/pull/493)) [@bdice](https://github.com/bdice)
-- Improve multi-CTA algorithm ([#492](https://github.com/rapidsai/cuvs/pull/492)) [@anaruse](https://github.com/anaruse)
-- Add filtering for CAGRA to C API ([#452](https://github.com/rapidsai/cuvs/pull/452)) [@ajit283](https://github.com/ajit283)
-- Initial cut for a cuVS Java API ([#450](https://github.com/rapidsai/cuvs/pull/450)) [@chatman](https://github.com/chatman)
-- Add breaking change workflow trigger ([#442](https://github.com/rapidsai/cuvs/pull/442)) [@AyodeAwe](https://github.com/AyodeAwe)
-- Expose `extend()` in C API ([#276](https://github.com/rapidsai/cuvs/pull/276)) [@ajit283](https://github.com/ajit283)
-- Go API - [WIP] ([#212](https://github.com/rapidsai/cuvs/pull/212)) [@ajit283](https://github.com/ajit283)
+- Add filtering to python for ivf_flat ([#664](https://github.com/nvidia/cuvs/pull/664)) [@benfred](https://github.com/benfred)
+- Expose binary quantizer to C and Python ([#660](https://github.com/nvidia/cuvs/pull/660)) [@benfred](https://github.com/benfred)
+- Add telemetry ([#652](https://github.com/nvidia/cuvs/pull/652)) [@gforsyth](https://github.com/gforsyth)
+- Revert docs builds to CI latest tag. ([#643](https://github.com/nvidia/cuvs/pull/643)) [@bdice](https://github.com/bdice)
+- Add float16 support in python for cagra/brute_force/ivf_pq and scalar quantizer ([#637](https://github.com/nvidia/cuvs/pull/637)) [@benfred](https://github.com/benfred)
+- Expose NN-Descent to C and Python ([#635](https://github.com/nvidia/cuvs/pull/635)) [@benfred](https://github.com/benfred)
+- Revert CUDA 12.8 shared workflow branch changes ([#630](https://github.com/nvidia/cuvs/pull/630)) [@vyasr](https://github.com/vyasr)
+- cuvs-java: Rework the api to be Java 21 friendly ([#628](https://github.com/nvidia/cuvs/pull/628)) [@ChrisHegarty](https://github.com/ChrisHegarty)
+- Build and test with CUDA 12.8.0 ([#621](https://github.com/nvidia/cuvs/pull/621)) [@bdice](https://github.com/bdice)
+- Add Scalar Quantization to the c and python apis ([#617](https://github.com/nvidia/cuvs/pull/617)) [@benfred](https://github.com/benfred)
+- Iteratively build graph index ([#612](https://github.com/nvidia/cuvs/pull/612)) [@anaruse](https://github.com/anaruse)
+- update pip devcontainers to UCX 1.18, small update-version.sh fixes ([#604](https://github.com/nvidia/cuvs/pull/604)) [@jameslamb](https://github.com/jameslamb)
+- Reduce CAGRA test runtime ([#602](https://github.com/nvidia/cuvs/pull/602)) [@bdice](https://github.com/bdice)
+- Revert &quot;Temporarily skip CUDA 11 wheel CI&quot; ([#601](https://github.com/nvidia/cuvs/pull/601)) [@bdice](https://github.com/bdice)
+- introduce libcuvs wheels ([#594](https://github.com/nvidia/cuvs/pull/594)) [@jameslamb](https://github.com/jameslamb)
+- Normalize whitespace ([#593](https://github.com/nvidia/cuvs/pull/593)) [@bdice](https://github.com/bdice)
+- Rename test to tests. ([#590](https://github.com/nvidia/cuvs/pull/590)) [@bdice](https://github.com/bdice)
+- Use cuda.bindings layout. ([#588](https://github.com/nvidia/cuvs/pull/588)) [@bdice](https://github.com/bdice)
+- run_cuvs_pytests.sh uses proper test dir ([#584](https://github.com/nvidia/cuvs/pull/584)) [@robertmaynard](https://github.com/robertmaynard)
+- expose col-major bfknn to python ([#575](https://github.com/nvidia/cuvs/pull/575)) [@benfred](https://github.com/benfred)
+- Run cuvs-bench pytests and end-to-end tests in CI ([#574](https://github.com/nvidia/cuvs/pull/574)) [@dantegd](https://github.com/dantegd)
+- Expose col-major pairwise distances to python ([#572](https://github.com/nvidia/cuvs/pull/572)) [@benfred](https://github.com/benfred)
+- Improve the performance of CAGRA new vector addition with the default params ([#569](https://github.com/nvidia/cuvs/pull/569)) [@enp1s0](https://github.com/enp1s0)
+- Improve filtering documentation ([#568](https://github.com/nvidia/cuvs/pull/568)) [@lowener](https://github.com/lowener)
+- Use GCC 13 in CUDA 12 conda builds. ([#567](https://github.com/nvidia/cuvs/pull/567)) [@bdice](https://github.com/bdice)
+- Allow brute_force::build to work on host matrix dataset ([#562](https://github.com/nvidia/cuvs/pull/562)) [@benfred](https://github.com/benfred)
+- FAISS with cuVS enabled in cuvs-bench ([#561](https://github.com/nvidia/cuvs/pull/561)) [@tarang-jain](https://github.com/tarang-jain)
+- Vamana build improvement and added docs ([#558](https://github.com/nvidia/cuvs/pull/558)) [@bkarsin](https://github.com/bkarsin)
+- Support raft&#39;s logger targets ([#557](https://github.com/nvidia/cuvs/pull/557)) [@vyasr](https://github.com/vyasr)
+- Get Breathe from conda again ([#554](https://github.com/nvidia/cuvs/pull/554)) [@vyasr](https://github.com/vyasr)
+- Check if nightlies have succeeded recently enough ([#548](https://github.com/nvidia/cuvs/pull/548)) [@vyasr](https://github.com/vyasr)
+- Add support for float16 to the python pairwise distance api ([#547](https://github.com/nvidia/cuvs/pull/547)) [@benfred](https://github.com/benfred)
+- Additional Distances for CAGRA C and Python API ([#546](https://github.com/nvidia/cuvs/pull/546)) [@tarang-jain](https://github.com/tarang-jain)
+- remove setup.cfg files, other packaging cleanup ([#544](https://github.com/nvidia/cuvs/pull/544)) [@jameslamb](https://github.com/jameslamb)
+- Fix CI for python cuvs_bench ([#541](https://github.com/nvidia/cuvs/pull/541)) [@benfred](https://github.com/benfred)
+- Update for raft logger changes ([#540](https://github.com/nvidia/cuvs/pull/540)) [@vyasr](https://github.com/vyasr)
+- Change brute_force api to match ivf*/cagra ([#536](https://github.com/nvidia/cuvs/pull/536)) [@benfred](https://github.com/benfred)
+- Branch 25.02 merge 24.12 ([#526](https://github.com/nvidia/cuvs/pull/526)) [@benfred](https://github.com/benfred)
+- Update cuda-python lower bounds to 12.6.2 / 11.8.5 ([#524](https://github.com/nvidia/cuvs/pull/524)) [@bdice](https://github.com/bdice)
+- Automatic adjustment of itopk size according to filtering rate ([#509](https://github.com/nvidia/cuvs/pull/509)) [@anaruse](https://github.com/anaruse)
+- prefer system install of UCX in devcontainers ([#501](https://github.com/nvidia/cuvs/pull/501)) [@jameslamb](https://github.com/jameslamb)
+- Adapt to rmm logger changes ([#499](https://github.com/nvidia/cuvs/pull/499)) [@vyasr](https://github.com/vyasr)
+- Require approval to run CI on draft PRs ([#498](https://github.com/nvidia/cuvs/pull/498)) [@bdice](https://github.com/bdice)
+- Remove RAFT BUILD_ANN_BENCH option ([#497](https://github.com/nvidia/cuvs/pull/497)) [@bdice](https://github.com/bdice)
+- Update example code fetching rapids-cmake to use CUVS instead of RAFT ([#493](https://github.com/nvidia/cuvs/pull/493)) [@bdice](https://github.com/bdice)
+- Improve multi-CTA algorithm ([#492](https://github.com/nvidia/cuvs/pull/492)) [@anaruse](https://github.com/anaruse)
+- Add filtering for CAGRA to C API ([#452](https://github.com/nvidia/cuvs/pull/452)) [@ajit283](https://github.com/ajit283)
+- Initial cut for a cuVS Java API ([#450](https://github.com/nvidia/cuvs/pull/450)) [@chatman](https://github.com/chatman)
+- Add breaking change workflow trigger ([#442](https://github.com/nvidia/cuvs/pull/442)) [@AyodeAwe](https://github.com/AyodeAwe)
+- Expose `extend()` in C API ([#276](https://github.com/nvidia/cuvs/pull/276)) [@ajit283](https://github.com/ajit283)
+- Go API - [WIP] ([#212](https://github.com/nvidia/cuvs/pull/212)) [@ajit283](https://github.com/ajit283)
 
 # cuvs 24.12.00 (11 Dec 2024)
 
 ## 🚨 Breaking Changes
 
-- HNSW CPU Hierarchy ([#465](https://github.com/rapidsai/cuvs/pull/465)) [@divyegala](https://github.com/divyegala)
-- Use dashes in cuvs-bench package name. ([#417](https://github.com/rapidsai/cuvs/pull/417)) [@bdice](https://github.com/bdice)
+- HNSW CPU Hierarchy ([#465](https://github.com/nvidia/cuvs/pull/465)) [@divyegala](https://github.com/divyegala)
+- Use dashes in cuvs-bench package name. ([#417](https://github.com/nvidia/cuvs/pull/417)) [@bdice](https://github.com/bdice)
 
 ## 🐛 Bug Fixes
 
-- Skip IVF-PQ packing test for lists with not enough data ([#512](https://github.com/rapidsai/cuvs/pull/512)) [@achirkin](https://github.com/achirkin)
-- [BUG] Fix CAGRA filter ([#489](https://github.com/rapidsai/cuvs/pull/489)) [@enp1s0](https://github.com/enp1s0)
-- Add `kIsSingleSource` to `PairwiseDistanceEpilogueElementwise` ([#485](https://github.com/rapidsai/cuvs/pull/485)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
-- Fix include errors, header, and unsafe locks in iface.hpp ([#467](https://github.com/rapidsai/cuvs/pull/467)) [@achirkin](https://github.com/achirkin)
-- Fix an OOB error in device-side cuvs::neighbors::refine and CAGRA kern_prune ([#460](https://github.com/rapidsai/cuvs/pull/460)) [@achirkin](https://github.com/achirkin)
-- Put a ceiling on cuda-python ([#445](https://github.com/rapidsai/cuvs/pull/445)) [@bdice](https://github.com/bdice)
-- Enable NVTX in cuvs-cagra-search component ([#439](https://github.com/rapidsai/cuvs/pull/439)) [@achirkin](https://github.com/achirkin)
-- BUG: CAGRA multi-cta illegal access with bad queries ([#438](https://github.com/rapidsai/cuvs/pull/438)) [@achirkin](https://github.com/achirkin)
-- Fix index overflow in edge cases of CAGRA graph optimize ([#435](https://github.com/rapidsai/cuvs/pull/435)) [@achirkin](https://github.com/achirkin)
-- Fix correct call to brute force in generate groundtruth of cuvs-bench ([#427](https://github.com/rapidsai/cuvs/pull/427)) [@dantegd](https://github.com/dantegd)
-- Use Python for sccache hit rate computation. ([#420](https://github.com/rapidsai/cuvs/pull/420)) [@bdice](https://github.com/bdice)
-- Add `click` package to `cuvs-bench` conda recipe ([#408](https://github.com/rapidsai/cuvs/pull/408)) [@divyegala](https://github.com/divyegala)
-- Fix NVTX annotations ([#400](https://github.com/rapidsai/cuvs/pull/400)) [@achirkin](https://github.com/achirkin)
+- Skip IVF-PQ packing test for lists with not enough data ([#512](https://github.com/nvidia/cuvs/pull/512)) [@achirkin](https://github.com/achirkin)
+- [BUG] Fix CAGRA filter ([#489](https://github.com/nvidia/cuvs/pull/489)) [@enp1s0](https://github.com/enp1s0)
+- Add `kIsSingleSource` to `PairwiseDistanceEpilogueElementwise` ([#485](https://github.com/nvidia/cuvs/pull/485)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
+- Fix include errors, header, and unsafe locks in iface.hpp ([#467](https://github.com/nvidia/cuvs/pull/467)) [@achirkin](https://github.com/achirkin)
+- Fix an OOB error in device-side cuvs::neighbors::refine and CAGRA kern_prune ([#460](https://github.com/nvidia/cuvs/pull/460)) [@achirkin](https://github.com/achirkin)
+- Put a ceiling on cuda-python ([#445](https://github.com/nvidia/cuvs/pull/445)) [@bdice](https://github.com/bdice)
+- Enable NVTX in cuvs-cagra-search component ([#439](https://github.com/nvidia/cuvs/pull/439)) [@achirkin](https://github.com/achirkin)
+- BUG: CAGRA multi-cta illegal access with bad queries ([#438](https://github.com/nvidia/cuvs/pull/438)) [@achirkin](https://github.com/achirkin)
+- Fix index overflow in edge cases of CAGRA graph optimize ([#435](https://github.com/nvidia/cuvs/pull/435)) [@achirkin](https://github.com/achirkin)
+- Fix correct call to brute force in generate groundtruth of cuvs-bench ([#427](https://github.com/nvidia/cuvs/pull/427)) [@dantegd](https://github.com/dantegd)
+- Use Python for sccache hit rate computation. ([#420](https://github.com/nvidia/cuvs/pull/420)) [@bdice](https://github.com/bdice)
+- Add `click` package to `cuvs-bench` conda recipe ([#408](https://github.com/nvidia/cuvs/pull/408)) [@divyegala](https://github.com/divyegala)
+- Fix NVTX annotations ([#400](https://github.com/nvidia/cuvs/pull/400)) [@achirkin](https://github.com/achirkin)
 
 ## 📖 Documentation
 
-- [Doc] Fix CAGRA search sample code ([#484](https://github.com/rapidsai/cuvs/pull/484)) [@enp1s0](https://github.com/enp1s0)
-- Fix broken link in README.md references ([#473](https://github.com/rapidsai/cuvs/pull/473)) [@Azurethi](https://github.com/Azurethi)
-- Adding tech stack to docs ([#448](https://github.com/rapidsai/cuvs/pull/448)) [@cjnolet](https://github.com/cjnolet)
-- Fix Question Retrieval notebook ([#352](https://github.com/rapidsai/cuvs/pull/352)) [@lowener](https://github.com/lowener)
+- [Doc] Fix CAGRA search sample code ([#484](https://github.com/nvidia/cuvs/pull/484)) [@enp1s0](https://github.com/enp1s0)
+- Fix broken link in README.md references ([#473](https://github.com/nvidia/cuvs/pull/473)) [@Azurethi](https://github.com/Azurethi)
+- Adding tech stack to docs ([#448](https://github.com/nvidia/cuvs/pull/448)) [@cjnolet](https://github.com/cjnolet)
+- Fix Question Retrieval notebook ([#352](https://github.com/nvidia/cuvs/pull/352)) [@lowener](https://github.com/lowener)
 
 ## 🚀 New Features
 
-- Add C++ API scalar quantization ([#494](https://github.com/rapidsai/cuvs/pull/494)) [@mfoerste4](https://github.com/mfoerste4)
-- HNSW CPU Hierarchy ([#465](https://github.com/rapidsai/cuvs/pull/465)) [@divyegala](https://github.com/divyegala)
-- Add serialization API to brute-force ([#461](https://github.com/rapidsai/cuvs/pull/461)) [@lowener](https://github.com/lowener)
-- Add Question Retrieval notebook using Milvus ([#451](https://github.com/rapidsai/cuvs/pull/451)) [@lowener](https://github.com/lowener)
-- Migrate feature diff for NN Descent from RAFT to cuVS ([#421](https://github.com/rapidsai/cuvs/pull/421)) [@divyegala](https://github.com/divyegala)
-- Add --no-lap-sync cmd option to ann-bench ([#405](https://github.com/rapidsai/cuvs/pull/405)) [@achirkin](https://github.com/achirkin)
-- Add `InnerProduct` and `CosineExpanded` metric support in NN Descent ([#177](https://github.com/rapidsai/cuvs/pull/177)) [@divyegala](https://github.com/divyegala)
+- Add C++ API scalar quantization ([#494](https://github.com/nvidia/cuvs/pull/494)) [@mfoerste4](https://github.com/mfoerste4)
+- HNSW CPU Hierarchy ([#465](https://github.com/nvidia/cuvs/pull/465)) [@divyegala](https://github.com/divyegala)
+- Add serialization API to brute-force ([#461](https://github.com/nvidia/cuvs/pull/461)) [@lowener](https://github.com/lowener)
+- Add Question Retrieval notebook using Milvus ([#451](https://github.com/nvidia/cuvs/pull/451)) [@lowener](https://github.com/lowener)
+- Migrate feature diff for NN Descent from RAFT to cuVS ([#421](https://github.com/nvidia/cuvs/pull/421)) [@divyegala](https://github.com/divyegala)
+- Add --no-lap-sync cmd option to ann-bench ([#405](https://github.com/nvidia/cuvs/pull/405)) [@achirkin](https://github.com/achirkin)
+- Add `InnerProduct` and `CosineExpanded` metric support in NN Descent ([#177](https://github.com/nvidia/cuvs/pull/177)) [@divyegala](https://github.com/divyegala)
 
 ## 🛠️ Improvements
 
-- Update cuvs to match raft&#39;s cutlass changes ([#516](https://github.com/rapidsai/cuvs/pull/516)) [@vyasr](https://github.com/vyasr)
-- add a README for wheels ([#504](https://github.com/rapidsai/cuvs/pull/504)) [@jameslamb](https://github.com/jameslamb)
-- Move check_input_array from pylibraft ([#474](https://github.com/rapidsai/cuvs/pull/474)) [@benfred](https://github.com/benfred)
-- use different wheel-size thresholds based on CUDA version ([#469](https://github.com/rapidsai/cuvs/pull/469)) [@jameslamb](https://github.com/jameslamb)
-- Modify cuvs-bench to be able to generate ground truth in CPU systems ([#466](https://github.com/rapidsai/cuvs/pull/466)) [@dantegd](https://github.com/dantegd)
-- enforce wheel size limits, README formatting in CI ([#464](https://github.com/rapidsai/cuvs/pull/464)) [@jameslamb](https://github.com/jameslamb)
-- Moving spectral embedding and kernel gramm APIs to cuVS ([#463](https://github.com/rapidsai/cuvs/pull/463)) [@cjnolet](https://github.com/cjnolet)
-- Migrate sparse knn and distances code from raft ([#457](https://github.com/rapidsai/cuvs/pull/457)) [@benfred](https://github.com/benfred)
-- Don&#39;t presume pointers location infers usability. ([#441](https://github.com/rapidsai/cuvs/pull/441)) [@robertmaynard](https://github.com/robertmaynard)
-- call `enable_testing` in root CMakeLists.txt ([#437](https://github.com/rapidsai/cuvs/pull/437)) [@robertmaynard](https://github.com/robertmaynard)
-- CAGRA tech debt: distance descriptor and workspace memory ([#436](https://github.com/rapidsai/cuvs/pull/436)) [@achirkin](https://github.com/achirkin)
-- Add ci run_ scripts needed for build infra ([#434](https://github.com/rapidsai/cuvs/pull/434)) [@robertmaynard](https://github.com/robertmaynard)
-- Use environment variables in cache hit rate computation. ([#422](https://github.com/rapidsai/cuvs/pull/422)) [@bdice](https://github.com/bdice)
-- Use dashes in cuvs-bench package name. ([#417](https://github.com/rapidsai/cuvs/pull/417)) [@bdice](https://github.com/bdice)
-- We need to enable the c_api by default ([#416](https://github.com/rapidsai/cuvs/pull/416)) [@robertmaynard](https://github.com/robertmaynard)
-- print sccache stats in builds ([#413](https://github.com/rapidsai/cuvs/pull/413)) [@jameslamb](https://github.com/jameslamb)
-- make conda installs in CI stricter ([#406](https://github.com/rapidsai/cuvs/pull/406)) [@jameslamb](https://github.com/jameslamb)
-- Ivf c example ([#404](https://github.com/rapidsai/cuvs/pull/404)) [@abner-ma](https://github.com/abner-ma)
-- Prune workflows based on changed files ([#392](https://github.com/rapidsai/cuvs/pull/392)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
-- [WIP] Add pinned memory resource to C API ([#311](https://github.com/rapidsai/cuvs/pull/311)) [@ajit283](https://github.com/ajit283)
-- Dynamic Batching ([#261](https://github.com/rapidsai/cuvs/pull/261)) [@achirkin](https://github.com/achirkin)
+- Update cuvs to match raft&#39;s cutlass changes ([#516](https://github.com/nvidia/cuvs/pull/516)) [@vyasr](https://github.com/vyasr)
+- add a README for wheels ([#504](https://github.com/nvidia/cuvs/pull/504)) [@jameslamb](https://github.com/jameslamb)
+- Move check_input_array from pylibraft ([#474](https://github.com/nvidia/cuvs/pull/474)) [@benfred](https://github.com/benfred)
+- use different wheel-size thresholds based on CUDA version ([#469](https://github.com/nvidia/cuvs/pull/469)) [@jameslamb](https://github.com/jameslamb)
+- Modify cuvs-bench to be able to generate ground truth in CPU systems ([#466](https://github.com/nvidia/cuvs/pull/466)) [@dantegd](https://github.com/dantegd)
+- enforce wheel size limits, README formatting in CI ([#464](https://github.com/nvidia/cuvs/pull/464)) [@jameslamb](https://github.com/jameslamb)
+- Moving spectral embedding and kernel gramm APIs to cuVS ([#463](https://github.com/nvidia/cuvs/pull/463)) [@cjnolet](https://github.com/cjnolet)
+- Migrate sparse knn and distances code from raft ([#457](https://github.com/nvidia/cuvs/pull/457)) [@benfred](https://github.com/benfred)
+- Don&#39;t presume pointers location infers usability. ([#441](https://github.com/nvidia/cuvs/pull/441)) [@robertmaynard](https://github.com/robertmaynard)
+- call `enable_testing` in root CMakeLists.txt ([#437](https://github.com/nvidia/cuvs/pull/437)) [@robertmaynard](https://github.com/robertmaynard)
+- CAGRA tech debt: distance descriptor and workspace memory ([#436](https://github.com/nvidia/cuvs/pull/436)) [@achirkin](https://github.com/achirkin)
+- Add ci run_ scripts needed for build infra ([#434](https://github.com/nvidia/cuvs/pull/434)) [@robertmaynard](https://github.com/robertmaynard)
+- Use environment variables in cache hit rate computation. ([#422](https://github.com/nvidia/cuvs/pull/422)) [@bdice](https://github.com/bdice)
+- Use dashes in cuvs-bench package name. ([#417](https://github.com/nvidia/cuvs/pull/417)) [@bdice](https://github.com/bdice)
+- We need to enable the c_api by default ([#416](https://github.com/nvidia/cuvs/pull/416)) [@robertmaynard](https://github.com/robertmaynard)
+- print sccache stats in builds ([#413](https://github.com/nvidia/cuvs/pull/413)) [@jameslamb](https://github.com/jameslamb)
+- make conda installs in CI stricter ([#406](https://github.com/nvidia/cuvs/pull/406)) [@jameslamb](https://github.com/jameslamb)
+- Ivf c example ([#404](https://github.com/nvidia/cuvs/pull/404)) [@abner-ma](https://github.com/abner-ma)
+- Prune workflows based on changed files ([#392](https://github.com/nvidia/cuvs/pull/392)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
+- [WIP] Add pinned memory resource to C API ([#311](https://github.com/nvidia/cuvs/pull/311)) [@ajit283](https://github.com/ajit283)
+- Dynamic Batching ([#261](https://github.com/nvidia/cuvs/pull/261)) [@achirkin](https://github.com/achirkin)
 
 # cuvs 24.10.00 (9 Oct 2024)
 
 ## 🐛 Bug Fixes
 
-- Use 64 bit types for dataset size calculation in CAGRA graph optimizer ([#380](https://github.com/rapidsai/cuvs/pull/380)) [@tfeher](https://github.com/tfeher)
-- Remove EXPLICIT_INSTANTIATE_ONLY macros ([#358](https://github.com/rapidsai/cuvs/pull/358)) [@achirkin](https://github.com/achirkin)
-- Fix order of operations for cosine IVF Flat ([#329](https://github.com/rapidsai/cuvs/pull/329)) [@lowener](https://github.com/lowener)
-- Exclude any kernel symbol that uses cutlass ([#314](https://github.com/rapidsai/cuvs/pull/314)) [@benfred](https://github.com/benfred)
-- [Fix] pin raft dependent to rapidsai ([#299](https://github.com/rapidsai/cuvs/pull/299)) [@rhdong](https://github.com/rhdong)
-- Fix dataset dimension in IVF-PQ C wrappers ([#292](https://github.com/rapidsai/cuvs/pull/292)) [@tfeher](https://github.com/tfeher)
-- Fix python ivf-pq for int8/uint8 dtypes ([#271](https://github.com/rapidsai/cuvs/pull/271)) [@benfred](https://github.com/benfred)
-- FP16 API for CAGRA and IVF-PQ ([#264](https://github.com/rapidsai/cuvs/pull/264)) [@tfeher](https://github.com/tfeher)
+- Use 64 bit types for dataset size calculation in CAGRA graph optimizer ([#380](https://github.com/nvidia/cuvs/pull/380)) [@tfeher](https://github.com/tfeher)
+- Remove EXPLICIT_INSTANTIATE_ONLY macros ([#358](https://github.com/nvidia/cuvs/pull/358)) [@achirkin](https://github.com/achirkin)
+- Fix order of operations for cosine IVF Flat ([#329](https://github.com/nvidia/cuvs/pull/329)) [@lowener](https://github.com/lowener)
+- Exclude any kernel symbol that uses cutlass ([#314](https://github.com/nvidia/cuvs/pull/314)) [@benfred](https://github.com/benfred)
+- [Fix] pin raft dependent to rapidsai ([#299](https://github.com/nvidia/cuvs/pull/299)) [@rhdong](https://github.com/rhdong)
+- Fix dataset dimension in IVF-PQ C wrappers ([#292](https://github.com/nvidia/cuvs/pull/292)) [@tfeher](https://github.com/tfeher)
+- Fix python ivf-pq for int8/uint8 dtypes ([#271](https://github.com/nvidia/cuvs/pull/271)) [@benfred](https://github.com/benfred)
+- FP16 API for CAGRA and IVF-PQ ([#264](https://github.com/nvidia/cuvs/pull/264)) [@tfeher](https://github.com/tfeher)
 
 ## 📖 Documentation
 
-- More doc updates for 24.10 ([#396](https://github.com/rapidsai/cuvs/pull/396)) [@cjnolet](https://github.com/cjnolet)
-- fix 404 in documentation link in readme ([#395](https://github.com/rapidsai/cuvs/pull/395)) [@benfred](https://github.com/benfred)
-- Improving getting started materials ([#342](https://github.com/rapidsai/cuvs/pull/342)) [@cjnolet](https://github.com/cjnolet)
-- Fix broken examples link in README. ([#326](https://github.com/rapidsai/cuvs/pull/326)) [@bdice](https://github.com/bdice)
-- Recommend `miniforge` for conda install ([#325](https://github.com/rapidsai/cuvs/pull/325)) [@bdice](https://github.com/bdice)
+- More doc updates for 24.10 ([#396](https://github.com/nvidia/cuvs/pull/396)) [@cjnolet](https://github.com/cjnolet)
+- fix 404 in documentation link in readme ([#395](https://github.com/nvidia/cuvs/pull/395)) [@benfred](https://github.com/benfred)
+- Improving getting started materials ([#342](https://github.com/nvidia/cuvs/pull/342)) [@cjnolet](https://github.com/cjnolet)
+- Fix broken examples link in README. ([#326](https://github.com/nvidia/cuvs/pull/326)) [@bdice](https://github.com/bdice)
+- Recommend `miniforge` for conda install ([#325](https://github.com/nvidia/cuvs/pull/325)) [@bdice](https://github.com/bdice)
 
 ## 🚀 New Features
 
-- Port remaining scripts to `cuvs_bench` ([#368](https://github.com/rapidsai/cuvs/pull/368)) [@divyegala](https://github.com/divyegala)
-- [Feat] Relative change with `bitset` API feature #2439 in raft ([#350](https://github.com/rapidsai/cuvs/pull/350)) [@rhdong](https://github.com/rhdong)
-- cuvs_bench plotting functions ([#347](https://github.com/rapidsai/cuvs/pull/347)) [@dantegd](https://github.com/dantegd)
-- CosineExpanded Metric for IVF-PQ (normalize inputs) ([#346](https://github.com/rapidsai/cuvs/pull/346)) [@tarang-jain](https://github.com/tarang-jain)
-- Python API for CAGRA+HNSW ([#246](https://github.com/rapidsai/cuvs/pull/246)) [@divyegala](https://github.com/divyegala)
-- C API for CAGRA+HNSW ([#240](https://github.com/rapidsai/cuvs/pull/240)) [@divyegala](https://github.com/divyegala)
-- SNMG ANN ([#231](https://github.com/rapidsai/cuvs/pull/231)) [@viclafargue](https://github.com/viclafargue)
-- [FEA] Support for half-float mixed precise in brute-force ([#225](https://github.com/rapidsai/cuvs/pull/225)) [@rhdong](https://github.com/rhdong)
+- Port remaining scripts to `cuvs_bench` ([#368](https://github.com/nvidia/cuvs/pull/368)) [@divyegala](https://github.com/divyegala)
+- [Feat] Relative change with `bitset` API feature #2439 in raft ([#350](https://github.com/nvidia/cuvs/pull/350)) [@rhdong](https://github.com/rhdong)
+- cuvs_bench plotting functions ([#347](https://github.com/nvidia/cuvs/pull/347)) [@dantegd](https://github.com/dantegd)
+- CosineExpanded Metric for IVF-PQ (normalize inputs) ([#346](https://github.com/nvidia/cuvs/pull/346)) [@tarang-jain](https://github.com/tarang-jain)
+- Python API for CAGRA+HNSW ([#246](https://github.com/nvidia/cuvs/pull/246)) [@divyegala](https://github.com/divyegala)
+- C API for CAGRA+HNSW ([#240](https://github.com/nvidia/cuvs/pull/240)) [@divyegala](https://github.com/divyegala)
+- SNMG ANN ([#231](https://github.com/nvidia/cuvs/pull/231)) [@viclafargue](https://github.com/viclafargue)
+- [FEA] Support for half-float mixed precise in brute-force ([#225](https://github.com/nvidia/cuvs/pull/225)) [@rhdong](https://github.com/rhdong)
 
 ## 🛠️ Improvements
 
-- Remove cuvs-cagra-search from cuvs_static link ([#388](https://github.com/rapidsai/cuvs/pull/388)) [@benfred](https://github.com/benfred)
-- Add a static library for cuvs ([#382](https://github.com/rapidsai/cuvs/pull/382)) [@benfred](https://github.com/benfred)
-- Put the ann-bench large_workspace_resource in managed memory ([#372](https://github.com/rapidsai/cuvs/pull/372)) [@achirkin](https://github.com/achirkin)
-- Add multigpu kmeans fit function ([#348](https://github.com/rapidsai/cuvs/pull/348)) [@benfred](https://github.com/benfred)
-- Update update-version.sh to use packaging lib ([#344](https://github.com/rapidsai/cuvs/pull/344)) [@AyodeAwe](https://github.com/AyodeAwe)
-- remove NCCL pins in build and test environments ([#341](https://github.com/rapidsai/cuvs/pull/341)) [@jameslamb](https://github.com/jameslamb)
-- Vamana/DiskANN index build ([#339](https://github.com/rapidsai/cuvs/pull/339)) [@bkarsin](https://github.com/bkarsin)
-- Use CI workflow branch &#39;branch-24.10&#39; again ([#331](https://github.com/rapidsai/cuvs/pull/331)) [@jameslamb](https://github.com/jameslamb)
-- fix style checks on Python 3.12 ([#328](https://github.com/rapidsai/cuvs/pull/328)) [@jameslamb](https://github.com/jameslamb)
-- Update flake8 to 7.1.1. ([#327](https://github.com/rapidsai/cuvs/pull/327)) [@bdice](https://github.com/bdice)
-- Add function for calculating the mutual_reachability_graph ([#323](https://github.com/rapidsai/cuvs/pull/323)) [@benfred](https://github.com/benfred)
-- Simplify libcuvs conda recipe. ([#322](https://github.com/rapidsai/cuvs/pull/322)) [@bdice](https://github.com/bdice)
-- Refactor dependencies.yaml to use depends-on pattern. ([#321](https://github.com/rapidsai/cuvs/pull/321)) [@bdice](https://github.com/bdice)
-- Update Python versions in cuvs_bench pyproject.toml. ([#318](https://github.com/rapidsai/cuvs/pull/318)) [@bdice](https://github.com/bdice)
-- Brute force knn tile size heuristic ([#316](https://github.com/rapidsai/cuvs/pull/316)) [@mfoerste4](https://github.com/mfoerste4)
-- Euclidean distance example ([#315](https://github.com/rapidsai/cuvs/pull/315)) [@abner-ma](https://github.com/abner-ma)
-- Migrate trustworthiness and silhouette_score stats from RAFT ([#313](https://github.com/rapidsai/cuvs/pull/313)) [@benfred](https://github.com/benfred)
-- Add support for Python 3.12 ([#312](https://github.com/rapidsai/cuvs/pull/312)) [@jameslamb](https://github.com/jameslamb)
-- Add `managed` option for RMM Pool memory resource to C API ([#305](https://github.com/rapidsai/cuvs/pull/305)) [@ajit283](https://github.com/ajit283)
-- Update rapidsai/pre-commit-hooks ([#303](https://github.com/rapidsai/cuvs/pull/303)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
-- Expose search function with pre-filter for ANN ([#302](https://github.com/rapidsai/cuvs/pull/302)) [@lowener](https://github.com/lowener)
-- Drop Python 3.9 support ([#301](https://github.com/rapidsai/cuvs/pull/301)) [@jameslamb](https://github.com/jameslamb)
-- Use CUDA math wheels ([#298](https://github.com/rapidsai/cuvs/pull/298)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
-- Remove NumPy &lt;2 pin ([#297](https://github.com/rapidsai/cuvs/pull/297)) [@seberg](https://github.com/seberg)
-- CAGRA - separable compilation for distance computation ([#296](https://github.com/rapidsai/cuvs/pull/296)) [@achirkin](https://github.com/achirkin)
-- Updating example notebooks ([#294](https://github.com/rapidsai/cuvs/pull/294)) [@cjnolet](https://github.com/cjnolet)
-- Add RMM Pool memory resource to C API ([#285](https://github.com/rapidsai/cuvs/pull/285)) [@ajit283](https://github.com/ajit283)
-- Update pre-commit hooks ([#283](https://github.com/rapidsai/cuvs/pull/283)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
-- Improve update-version.sh ([#282](https://github.com/rapidsai/cuvs/pull/282)) [@bdice](https://github.com/bdice)
-- Use tool.scikit-build.cmake.version, set scikit-build-core minimum-version ([#280](https://github.com/rapidsai/cuvs/pull/280)) [@jameslamb](https://github.com/jameslamb)
-- Add cuvs_bench.run python code and build ([#279](https://github.com/rapidsai/cuvs/pull/279)) [@dantegd](https://github.com/dantegd)
-- Add cuvs-bench to dependencies and conda environments ([#275](https://github.com/rapidsai/cuvs/pull/275)) [@dantegd](https://github.com/dantegd)
-- Update pip devcontainers to UCX v1.17.0 ([#262](https://github.com/rapidsai/cuvs/pull/262)) [@jameslamb](https://github.com/jameslamb)
-- Adding example for tuning build and search params using Optuna ([#257](https://github.com/rapidsai/cuvs/pull/257)) [@dpadmanabhan03](https://github.com/dpadmanabhan03)
-- Fixed link to build docs and corrected ivf_flat_example ([#255](https://github.com/rapidsai/cuvs/pull/255)) [@mmccarty](https://github.com/mmccarty)
-- Merge branch-24.08 into branch-24.10 ([#254](https://github.com/rapidsai/cuvs/pull/254)) [@jameslamb](https://github.com/jameslamb)
-- Persistent CAGRA kernel ([#215](https://github.com/rapidsai/cuvs/pull/215)) [@achirkin](https://github.com/achirkin)
-- [FEA] Support for Cosine distance in IVF-Flat ([#179](https://github.com/rapidsai/cuvs/pull/179)) [@lowener](https://github.com/lowener)
+- Remove cuvs-cagra-search from cuvs_static link ([#388](https://github.com/nvidia/cuvs/pull/388)) [@benfred](https://github.com/benfred)
+- Add a static library for cuvs ([#382](https://github.com/nvidia/cuvs/pull/382)) [@benfred](https://github.com/benfred)
+- Put the ann-bench large_workspace_resource in managed memory ([#372](https://github.com/nvidia/cuvs/pull/372)) [@achirkin](https://github.com/achirkin)
+- Add multigpu kmeans fit function ([#348](https://github.com/nvidia/cuvs/pull/348)) [@benfred](https://github.com/benfred)
+- Update update-version.sh to use packaging lib ([#344](https://github.com/nvidia/cuvs/pull/344)) [@AyodeAwe](https://github.com/AyodeAwe)
+- remove NCCL pins in build and test environments ([#341](https://github.com/nvidia/cuvs/pull/341)) [@jameslamb](https://github.com/jameslamb)
+- Vamana/DiskANN index build ([#339](https://github.com/nvidia/cuvs/pull/339)) [@bkarsin](https://github.com/bkarsin)
+- Use CI workflow branch &#39;branch-24.10&#39; again ([#331](https://github.com/nvidia/cuvs/pull/331)) [@jameslamb](https://github.com/jameslamb)
+- fix style checks on Python 3.12 ([#328](https://github.com/nvidia/cuvs/pull/328)) [@jameslamb](https://github.com/jameslamb)
+- Update flake8 to 7.1.1. ([#327](https://github.com/nvidia/cuvs/pull/327)) [@bdice](https://github.com/bdice)
+- Add function for calculating the mutual_reachability_graph ([#323](https://github.com/nvidia/cuvs/pull/323)) [@benfred](https://github.com/benfred)
+- Simplify libcuvs conda recipe. ([#322](https://github.com/nvidia/cuvs/pull/322)) [@bdice](https://github.com/bdice)
+- Refactor dependencies.yaml to use depends-on pattern. ([#321](https://github.com/nvidia/cuvs/pull/321)) [@bdice](https://github.com/bdice)
+- Update Python versions in cuvs_bench pyproject.toml. ([#318](https://github.com/nvidia/cuvs/pull/318)) [@bdice](https://github.com/bdice)
+- Brute force knn tile size heuristic ([#316](https://github.com/nvidia/cuvs/pull/316)) [@mfoerste4](https://github.com/mfoerste4)
+- Euclidean distance example ([#315](https://github.com/nvidia/cuvs/pull/315)) [@abner-ma](https://github.com/abner-ma)
+- Migrate trustworthiness and silhouette_score stats from RAFT ([#313](https://github.com/nvidia/cuvs/pull/313)) [@benfred](https://github.com/benfred)
+- Add support for Python 3.12 ([#312](https://github.com/nvidia/cuvs/pull/312)) [@jameslamb](https://github.com/jameslamb)
+- Add `managed` option for RMM Pool memory resource to C API ([#305](https://github.com/nvidia/cuvs/pull/305)) [@ajit283](https://github.com/ajit283)
+- Update rapidsai/pre-commit-hooks ([#303](https://github.com/nvidia/cuvs/pull/303)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
+- Expose search function with pre-filter for ANN ([#302](https://github.com/nvidia/cuvs/pull/302)) [@lowener](https://github.com/lowener)
+- Drop Python 3.9 support ([#301](https://github.com/nvidia/cuvs/pull/301)) [@jameslamb](https://github.com/jameslamb)
+- Use CUDA math wheels ([#298](https://github.com/nvidia/cuvs/pull/298)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
+- Remove NumPy &lt;2 pin ([#297](https://github.com/nvidia/cuvs/pull/297)) [@seberg](https://github.com/seberg)
+- CAGRA - separable compilation for distance computation ([#296](https://github.com/nvidia/cuvs/pull/296)) [@achirkin](https://github.com/achirkin)
+- Updating example notebooks ([#294](https://github.com/nvidia/cuvs/pull/294)) [@cjnolet](https://github.com/cjnolet)
+- Add RMM Pool memory resource to C API ([#285](https://github.com/nvidia/cuvs/pull/285)) [@ajit283](https://github.com/ajit283)
+- Update pre-commit hooks ([#283](https://github.com/nvidia/cuvs/pull/283)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
+- Improve update-version.sh ([#282](https://github.com/nvidia/cuvs/pull/282)) [@bdice](https://github.com/bdice)
+- Use tool.scikit-build.cmake.version, set scikit-build-core minimum-version ([#280](https://github.com/nvidia/cuvs/pull/280)) [@jameslamb](https://github.com/jameslamb)
+- Add cuvs_bench.run python code and build ([#279](https://github.com/nvidia/cuvs/pull/279)) [@dantegd](https://github.com/dantegd)
+- Add cuvs-bench to dependencies and conda environments ([#275](https://github.com/nvidia/cuvs/pull/275)) [@dantegd](https://github.com/dantegd)
+- Update pip devcontainers to UCX v1.17.0 ([#262](https://github.com/nvidia/cuvs/pull/262)) [@jameslamb](https://github.com/jameslamb)
+- Adding example for tuning build and search params using Optuna ([#257](https://github.com/nvidia/cuvs/pull/257)) [@dpadmanabhan03](https://github.com/dpadmanabhan03)
+- Fixed link to build docs and corrected ivf_flat_example ([#255](https://github.com/nvidia/cuvs/pull/255)) [@mmccarty](https://github.com/mmccarty)
+- Merge branch-24.08 into branch-24.10 ([#254](https://github.com/nvidia/cuvs/pull/254)) [@jameslamb](https://github.com/jameslamb)
+- Persistent CAGRA kernel ([#215](https://github.com/nvidia/cuvs/pull/215)) [@achirkin](https://github.com/achirkin)
+- [FEA] Support for Cosine distance in IVF-Flat ([#179](https://github.com/nvidia/cuvs/pull/179)) [@lowener](https://github.com/lowener)
 
 # cuvs 24.08.00 (7 Aug 2024)
 
 ## 🚨 Breaking Changes
 
-- Allow serialization on streams ([#173](https://github.com/rapidsai/cuvs/pull/173)) [@benfred](https://github.com/benfred)
+- Allow serialization on streams ([#173](https://github.com/nvidia/cuvs/pull/173)) [@benfred](https://github.com/benfred)
 
 ## 🐛 Bug Fixes
 
-- Remove fp16 kernels that have no public entry point ([#268](https://github.com/rapidsai/cuvs/pull/268)) [@tfeher](https://github.com/tfeher)
-- Use `raft::util::popc(...)` public API ([#249](https://github.com/rapidsai/cuvs/pull/249)) [@divyegala](https://github.com/divyegala)
-- Enable building FAISS main statically ([#241](https://github.com/rapidsai/cuvs/pull/241)) [@tarang-jain](https://github.com/tarang-jain)
-- CAGRA bench: use device-side refinement when the data is on device ([#228](https://github.com/rapidsai/cuvs/pull/228)) [@achirkin](https://github.com/achirkin)
-- Rename `.devcontainer`s for CUDA 12.5 ([#224](https://github.com/rapidsai/cuvs/pull/224)) [@jakirkham](https://github.com/jakirkham)
-- Fix a CAGRA graph opt bug ([#192](https://github.com/rapidsai/cuvs/pull/192)) [@enp1s0](https://github.com/enp1s0)
+- Remove fp16 kernels that have no public entry point ([#268](https://github.com/nvidia/cuvs/pull/268)) [@tfeher](https://github.com/tfeher)
+- Use `raft::util::popc(...)` public API ([#249](https://github.com/nvidia/cuvs/pull/249)) [@divyegala](https://github.com/divyegala)
+- Enable building FAISS main statically ([#241](https://github.com/nvidia/cuvs/pull/241)) [@tarang-jain](https://github.com/tarang-jain)
+- CAGRA bench: use device-side refinement when the data is on device ([#228](https://github.com/nvidia/cuvs/pull/228)) [@achirkin](https://github.com/achirkin)
+- Rename `.devcontainer`s for CUDA 12.5 ([#224](https://github.com/nvidia/cuvs/pull/224)) [@jakirkham](https://github.com/jakirkham)
+- Fix a CAGRA graph opt bug ([#192](https://github.com/nvidia/cuvs/pull/192)) [@enp1s0](https://github.com/enp1s0)
 
 ## 📖 Documentation
 
-- fix library name in docs (&#39;cuvs&#39; not &#39;pycuvs&#39;) ([#193](https://github.com/rapidsai/cuvs/pull/193)) [@jameslamb](https://github.com/jameslamb)
+- fix library name in docs (&#39;cuvs&#39; not &#39;pycuvs&#39;) ([#193](https://github.com/nvidia/cuvs/pull/193)) [@jameslamb](https://github.com/jameslamb)
 
 ## 🚀 New Features
 
-- Add cuvs_bench python folder, config files and constraints ([#244](https://github.com/rapidsai/cuvs/pull/244)) [@dantegd](https://github.com/dantegd)
-- Add MST optimization to guarantee the connectivity of CAGRA graphs ([#237](https://github.com/rapidsai/cuvs/pull/237)) [@anaruse](https://github.com/anaruse)
-- Moving over C++ API of CAGRA+hnswlib from RAFT ([#229](https://github.com/rapidsai/cuvs/pull/229)) [@divyegala](https://github.com/divyegala)
-- [FEA] expose python &amp; C API for prefiltered brute force ([#174](https://github.com/rapidsai/cuvs/pull/174)) [@rhdong](https://github.com/rhdong)
-- CAGRA new vector addition ([#151](https://github.com/rapidsai/cuvs/pull/151)) [@enp1s0](https://github.com/enp1s0)
+- Add cuvs_bench python folder, config files and constraints ([#244](https://github.com/nvidia/cuvs/pull/244)) [@dantegd](https://github.com/dantegd)
+- Add MST optimization to guarantee the connectivity of CAGRA graphs ([#237](https://github.com/nvidia/cuvs/pull/237)) [@anaruse](https://github.com/anaruse)
+- Moving over C++ API of CAGRA+hnswlib from RAFT ([#229](https://github.com/nvidia/cuvs/pull/229)) [@divyegala](https://github.com/divyegala)
+- [FEA] expose python &amp; C API for prefiltered brute force ([#174](https://github.com/nvidia/cuvs/pull/174)) [@rhdong](https://github.com/rhdong)
+- CAGRA new vector addition ([#151](https://github.com/nvidia/cuvs/pull/151)) [@enp1s0](https://github.com/enp1s0)
 
 ## 🛠️ Improvements
 
-- [Opt] introduce the `masked_matmul` to prefiltered brute force. ([#251](https://github.com/rapidsai/cuvs/pull/251)) [@rhdong](https://github.com/rhdong)
-- Add more info to ANN_BENCH context ([#248](https://github.com/rapidsai/cuvs/pull/248)) [@achirkin](https://github.com/achirkin)
-- split up CUDA-suffixed dependencies in dependencies.yaml ([#247](https://github.com/rapidsai/cuvs/pull/247)) [@jameslamb](https://github.com/jameslamb)
-- Fix pinning to a different RAFT tag ([#235](https://github.com/rapidsai/cuvs/pull/235)) [@benfred](https://github.com/benfred)
-- Use workflow branch 24.08 again ([#234](https://github.com/rapidsai/cuvs/pull/234)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
-- chore: update search_plan.cuh ([#232](https://github.com/rapidsai/cuvs/pull/232)) [@eltociear](https://github.com/eltociear)
-- Enable kernel &amp; memcpy overlapping in IVF index building ([#230](https://github.com/rapidsai/cuvs/pull/230)) [@abc99lr](https://github.com/abc99lr)
-- CAGRA: reduce argument count in select_and_run() kernel wrappers ([#227](https://github.com/rapidsai/cuvs/pull/227)) [@achirkin](https://github.com/achirkin)
-- Mark the rust brute force unittest as flaky ([#226](https://github.com/rapidsai/cuvs/pull/226)) [@benfred](https://github.com/benfred)
-- Add python bindings for ivf-* extend functions ([#220](https://github.com/rapidsai/cuvs/pull/220)) [@benfred](https://github.com/benfred)
-- Build and test with CUDA 12.5.1 ([#219](https://github.com/rapidsai/cuvs/pull/219)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
-- Add col-major support for brute force knn ([#217](https://github.com/rapidsai/cuvs/pull/217)) [@benfred](https://github.com/benfred)
-- Add CUDA_STATIC_MATH_LIBRARIES ([#216](https://github.com/rapidsai/cuvs/pull/216)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
-- skip CMake 3.30.0 ([#214](https://github.com/rapidsai/cuvs/pull/214)) [@jameslamb](https://github.com/jameslamb)
-- Complete Migration of IVF Helpers / Features from RAFT ([#213](https://github.com/rapidsai/cuvs/pull/213)) [@tarang-jain](https://github.com/tarang-jain)
-- Use verify-alpha-spec hook ([#209](https://github.com/rapidsai/cuvs/pull/209)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
-- Fixes for publishing rust package to crates.io ([#207](https://github.com/rapidsai/cuvs/pull/207)) [@benfred](https://github.com/benfred)
-- Add rust example ([#206](https://github.com/rapidsai/cuvs/pull/206)) [@benfred](https://github.com/benfred)
-- Adding IVF examples ([#203](https://github.com/rapidsai/cuvs/pull/203)) [@cjnolet](https://github.com/cjnolet)
-- Fix compilation error when _CLK_BREAKDOWN is defined in cagra. ([#202](https://github.com/rapidsai/cuvs/pull/202)) [@jiangyinzuo](https://github.com/jiangyinzuo)
-- DOC: update notebook link ([#191](https://github.com/rapidsai/cuvs/pull/191)) [@raybellwaves](https://github.com/raybellwaves)
-- Change cagra.build_index to cagra.build ([#187](https://github.com/rapidsai/cuvs/pull/187)) [@benfred](https://github.com/benfred)
-- Add python serialization API&#39;s for ivf-pq and ivf_flat ([#186](https://github.com/rapidsai/cuvs/pull/186)) [@benfred](https://github.com/benfred)
-- resolve dependency-file-generator warning, rapids-build-backend followup ([#185](https://github.com/rapidsai/cuvs/pull/185)) [@jameslamb](https://github.com/jameslamb)
-- Adopt CI/packaging codeowners ([#183](https://github.com/rapidsai/cuvs/pull/183)) [@raydouglass](https://github.com/raydouglass)
-- Scaling workspace resources ([#181](https://github.com/rapidsai/cuvs/pull/181)) [@achirkin](https://github.com/achirkin)
-- Remove text builds of documentation ([#180](https://github.com/rapidsai/cuvs/pull/180)) [@vyasr](https://github.com/vyasr)
-- Add refine to the Python and C api&#39;s ([#175](https://github.com/rapidsai/cuvs/pull/175)) [@benfred](https://github.com/benfred)
-- Allow serialization on streams ([#173](https://github.com/rapidsai/cuvs/pull/173)) [@benfred](https://github.com/benfred)
-- Forward-merge branch-24.06 into branch-24.08 ([#169](https://github.com/rapidsai/cuvs/pull/169)) [@benfred](https://github.com/benfred)
-- Use rapids-build-backend ([#145](https://github.com/rapidsai/cuvs/pull/145)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
-- ANN_BENCH ([#130](https://github.com/rapidsai/cuvs/pull/130)) [@achirkin](https://github.com/achirkin)
-- Enable random subsampling ([#122](https://github.com/rapidsai/cuvs/pull/122)) [@tfeher](https://github.com/tfeher)
+- [Opt] introduce the `masked_matmul` to prefiltered brute force. ([#251](https://github.com/nvidia/cuvs/pull/251)) [@rhdong](https://github.com/rhdong)
+- Add more info to ANN_BENCH context ([#248](https://github.com/nvidia/cuvs/pull/248)) [@achirkin](https://github.com/achirkin)
+- split up CUDA-suffixed dependencies in dependencies.yaml ([#247](https://github.com/nvidia/cuvs/pull/247)) [@jameslamb](https://github.com/jameslamb)
+- Fix pinning to a different RAFT tag ([#235](https://github.com/nvidia/cuvs/pull/235)) [@benfred](https://github.com/benfred)
+- Use workflow branch 24.08 again ([#234](https://github.com/nvidia/cuvs/pull/234)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
+- chore: update search_plan.cuh ([#232](https://github.com/nvidia/cuvs/pull/232)) [@eltociear](https://github.com/eltociear)
+- Enable kernel &amp; memcpy overlapping in IVF index building ([#230](https://github.com/nvidia/cuvs/pull/230)) [@abc99lr](https://github.com/abc99lr)
+- CAGRA: reduce argument count in select_and_run() kernel wrappers ([#227](https://github.com/nvidia/cuvs/pull/227)) [@achirkin](https://github.com/achirkin)
+- Mark the rust brute force unittest as flaky ([#226](https://github.com/nvidia/cuvs/pull/226)) [@benfred](https://github.com/benfred)
+- Add python bindings for ivf-* extend functions ([#220](https://github.com/nvidia/cuvs/pull/220)) [@benfred](https://github.com/benfred)
+- Build and test with CUDA 12.5.1 ([#219](https://github.com/nvidia/cuvs/pull/219)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
+- Add col-major support for brute force knn ([#217](https://github.com/nvidia/cuvs/pull/217)) [@benfred](https://github.com/benfred)
+- Add CUDA_STATIC_MATH_LIBRARIES ([#216](https://github.com/nvidia/cuvs/pull/216)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
+- skip CMake 3.30.0 ([#214](https://github.com/nvidia/cuvs/pull/214)) [@jameslamb](https://github.com/jameslamb)
+- Complete Migration of IVF Helpers / Features from RAFT ([#213](https://github.com/nvidia/cuvs/pull/213)) [@tarang-jain](https://github.com/tarang-jain)
+- Use verify-alpha-spec hook ([#209](https://github.com/nvidia/cuvs/pull/209)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
+- Fixes for publishing rust package to crates.io ([#207](https://github.com/nvidia/cuvs/pull/207)) [@benfred](https://github.com/benfred)
+- Add rust example ([#206](https://github.com/nvidia/cuvs/pull/206)) [@benfred](https://github.com/benfred)
+- Adding IVF examples ([#203](https://github.com/nvidia/cuvs/pull/203)) [@cjnolet](https://github.com/cjnolet)
+- Fix compilation error when _CLK_BREAKDOWN is defined in cagra. ([#202](https://github.com/nvidia/cuvs/pull/202)) [@jiangyinzuo](https://github.com/jiangyinzuo)
+- DOC: update notebook link ([#191](https://github.com/nvidia/cuvs/pull/191)) [@raybellwaves](https://github.com/raybellwaves)
+- Change cagra.build_index to cagra.build ([#187](https://github.com/nvidia/cuvs/pull/187)) [@benfred](https://github.com/benfred)
+- Add python serialization API&#39;s for ivf-pq and ivf_flat ([#186](https://github.com/nvidia/cuvs/pull/186)) [@benfred](https://github.com/benfred)
+- resolve dependency-file-generator warning, rapids-build-backend followup ([#185](https://github.com/nvidia/cuvs/pull/185)) [@jameslamb](https://github.com/jameslamb)
+- Adopt CI/packaging codeowners ([#183](https://github.com/nvidia/cuvs/pull/183)) [@raydouglass](https://github.com/raydouglass)
+- Scaling workspace resources ([#181](https://github.com/nvidia/cuvs/pull/181)) [@achirkin](https://github.com/achirkin)
+- Remove text builds of documentation ([#180](https://github.com/nvidia/cuvs/pull/180)) [@vyasr](https://github.com/vyasr)
+- Add refine to the Python and C api&#39;s ([#175](https://github.com/nvidia/cuvs/pull/175)) [@benfred](https://github.com/benfred)
+- Allow serialization on streams ([#173](https://github.com/nvidia/cuvs/pull/173)) [@benfred](https://github.com/benfred)
+- Forward-merge branch-24.06 into branch-24.08 ([#169](https://github.com/nvidia/cuvs/pull/169)) [@benfred](https://github.com/benfred)
+- Use rapids-build-backend ([#145](https://github.com/nvidia/cuvs/pull/145)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
+- ANN_BENCH ([#130](https://github.com/nvidia/cuvs/pull/130)) [@achirkin](https://github.com/achirkin)
+- Enable random subsampling ([#122](https://github.com/nvidia/cuvs/pull/122)) [@tfeher](https://github.com/tfeher)
 
 # cuvs 24.06.00 (5 Jun 2024)
 
 ## 🐛 Bug Fixes
 
-- Fix CAGRA OOM handling ([#167](https://github.com/rapidsai/cuvs/pull/167)) [@tfeher](https://github.com/tfeher)
-- Pass through raft static CMake var ([#111](https://github.com/rapidsai/cuvs/pull/111)) [@vyasr](https://github.com/vyasr)
-- Fix rust docs build ([#84](https://github.com/rapidsai/cuvs/pull/84)) [@benfred](https://github.com/benfred)
+- Fix CAGRA OOM handling ([#167](https://github.com/nvidia/cuvs/pull/167)) [@tfeher](https://github.com/tfeher)
+- Pass through raft static CMake var ([#111](https://github.com/nvidia/cuvs/pull/111)) [@vyasr](https://github.com/vyasr)
+- Fix rust docs build ([#84](https://github.com/nvidia/cuvs/pull/84)) [@benfred](https://github.com/benfred)
 
 ## 📖 Documentation
 
-- chore: update Doxyfile ([#162](https://github.com/rapidsai/cuvs/pull/162)) [@eltociear](https://github.com/eltociear)
-- cuVS docs updates for release ([#161](https://github.com/rapidsai/cuvs/pull/161)) [@cjnolet](https://github.com/cjnolet)
-- update: fix RAFT URL in README ([#91](https://github.com/rapidsai/cuvs/pull/91)) [@hurutoriya](https://github.com/hurutoriya)
-- Update the developer&#39;s guide with new copyright hook ([#81](https://github.com/rapidsai/cuvs/pull/81)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
-- Add `lucene-cuvs` to integrations section of docs ([#73](https://github.com/rapidsai/cuvs/pull/73)) [@cjnolet](https://github.com/cjnolet)
+- chore: update Doxyfile ([#162](https://github.com/nvidia/cuvs/pull/162)) [@eltociear](https://github.com/eltociear)
+- cuVS docs updates for release ([#161](https://github.com/nvidia/cuvs/pull/161)) [@cjnolet](https://github.com/cjnolet)
+- update: fix RAFT URL in README ([#91](https://github.com/nvidia/cuvs/pull/91)) [@hurutoriya](https://github.com/hurutoriya)
+- Update the developer&#39;s guide with new copyright hook ([#81](https://github.com/nvidia/cuvs/pull/81)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
+- Add `lucene-cuvs` to integrations section of docs ([#73](https://github.com/nvidia/cuvs/pull/73)) [@cjnolet](https://github.com/cjnolet)
 
 ## 🚀 New Features
 
-- Add `refine` to public API ([#154](https://github.com/rapidsai/cuvs/pull/154)) [@lowener](https://github.com/lowener)
-- [FEA] support of prefiltered brute force ([#146](https://github.com/rapidsai/cuvs/pull/146)) [@rhdong](https://github.com/rhdong)
-- Migrate IVF-Flat from RAFT ([#94](https://github.com/rapidsai/cuvs/pull/94)) [@divyegala](https://github.com/divyegala)
-- Migrate IVF-PQ from RAFT to cuVS ([#86](https://github.com/rapidsai/cuvs/pull/86)) [@lowener](https://github.com/lowener)
+- Add `refine` to public API ([#154](https://github.com/nvidia/cuvs/pull/154)) [@lowener](https://github.com/lowener)
+- [FEA] support of prefiltered brute force ([#146](https://github.com/nvidia/cuvs/pull/146)) [@rhdong](https://github.com/rhdong)
+- Migrate IVF-Flat from RAFT ([#94](https://github.com/nvidia/cuvs/pull/94)) [@divyegala](https://github.com/divyegala)
+- Migrate IVF-PQ from RAFT to cuVS ([#86](https://github.com/nvidia/cuvs/pull/86)) [@lowener](https://github.com/lowener)
 
 ## 🛠️ Improvements
 
-- Expose serialization to the python / c-api ([#164](https://github.com/rapidsai/cuvs/pull/164)) [@benfred](https://github.com/benfred)
-- Select k instantiations ([#159](https://github.com/rapidsai/cuvs/pull/159)) [@benfred](https://github.com/benfred)
-- fix devcontainer name for codespaces ([#153](https://github.com/rapidsai/cuvs/pull/153)) [@trxcllnt](https://github.com/trxcllnt)
-- Accept host_mdspan for IVF-PQ build and extend ([#148](https://github.com/rapidsai/cuvs/pull/148)) [@tfeher](https://github.com/tfeher)
-- Add pairwise_distance api&#39;s for C, Python and Rust ([#142](https://github.com/rapidsai/cuvs/pull/142)) [@benfred](https://github.com/benfred)
-- Changing RAFT_EXPLICT_* to CUVS_EXPLITI_* ([#141](https://github.com/rapidsai/cuvs/pull/141)) [@cjnolet](https://github.com/cjnolet)
-- Speed-up rust build ([#138](https://github.com/rapidsai/cuvs/pull/138)) [@benfred](https://github.com/benfred)
-- Removing `libraft.so` from libcuvs dependencies ([#132](https://github.com/rapidsai/cuvs/pull/132)) [@cjnolet](https://github.com/cjnolet)
-- CAGRA API update and allow async host refinement ([#131](https://github.com/rapidsai/cuvs/pull/131)) [@mfoerste4](https://github.com/mfoerste4)
-- Fix rust api docs ([#119](https://github.com/rapidsai/cuvs/pull/119)) [@benfred](https://github.com/benfred)
-- Migrate BFKNN from raft ([#118](https://github.com/rapidsai/cuvs/pull/118)) [@benfred](https://github.com/benfred)
-- Fix IVF-PQ helper functions ([#116](https://github.com/rapidsai/cuvs/pull/116)) [@lowener](https://github.com/lowener)
-- Migrate `raft::cluster` to `cuvs::cluster` ([#115](https://github.com/rapidsai/cuvs/pull/115)) [@cjnolet](https://github.com/cjnolet)
-- hide RAFT #pragma deprecation warnings ([#114](https://github.com/rapidsai/cuvs/pull/114)) [@trxcllnt](https://github.com/trxcllnt)
-- Enable Warnings as errors in Python tests ([#102](https://github.com/rapidsai/cuvs/pull/102)) [@mroeschke](https://github.com/mroeschke)
-- Remove libnvjitlink dependency. ([#97](https://github.com/rapidsai/cuvs/pull/97)) [@bdice](https://github.com/bdice)
-- Migrate to `{{ stdlib(&quot;c&quot;) }}` ([#93](https://github.com/rapidsai/cuvs/pull/93)) [@hcho3](https://github.com/hcho3)
-- update: replace to cuvs from RAFT in PULL_REQUEST_TEMPLATE ([#92](https://github.com/rapidsai/cuvs/pull/92)) [@hurutoriya](https://github.com/hurutoriya)
-- Add python and rust bindings for Ivf-Pq ([#90](https://github.com/rapidsai/cuvs/pull/90)) [@benfred](https://github.com/benfred)
-- add --rm and --name to devcontainer run args ([#89](https://github.com/rapidsai/cuvs/pull/89)) [@trxcllnt](https://github.com/trxcllnt)
-- Update pip devcontainers to UCX v1.15.0 ([#88](https://github.com/rapidsai/cuvs/pull/88)) [@trxcllnt](https://github.com/trxcllnt)
-- Remove gtest from dependencies.yaml ([#87](https://github.com/rapidsai/cuvs/pull/87)) [@robertmaynard](https://github.com/robertmaynard)
-- Moving and renaming distance namespaces from raft -&gt; cuvs ([#85](https://github.com/rapidsai/cuvs/pull/85)) [@cjnolet](https://github.com/cjnolet)
-- Use static gtest ([#83](https://github.com/rapidsai/cuvs/pull/83)) [@robertmaynard](https://github.com/robertmaynard)
-- Add python and rust bindings for Ivf-Flat ([#82](https://github.com/rapidsai/cuvs/pull/82)) [@benfred](https://github.com/benfred)
-- Forward merge branch-24.04 to branch-24.06 ([#80](https://github.com/rapidsai/cuvs/pull/80)) [@benfred](https://github.com/benfred)
-- Update devcontainers to use cuda12.2 ([#72](https://github.com/rapidsai/cuvs/pull/72)) [@benfred](https://github.com/benfred)
-- Forward merge branch-24.04 to branch-24.06 ([#71](https://github.com/rapidsai/cuvs/pull/71)) [@benfred](https://github.com/benfred)
-- Enable forward-merger ops-bot plugin ([#70](https://github.com/rapidsai/cuvs/pull/70)) [@benfred](https://github.com/benfred)
-- Adds missing files to `update-version.sh` ([#69](https://github.com/rapidsai/cuvs/pull/69)) [@AyodeAwe](https://github.com/AyodeAwe)
-- Add Cagra-Q compression to the python and rust api&#39;s ([#68](https://github.com/rapidsai/cuvs/pull/68)) [@benfred](https://github.com/benfred)
-- ConfigureCUDA.cmake now sets CUVS_ prefixed variables ([#66](https://github.com/rapidsai/cuvs/pull/66)) [@robertmaynard](https://github.com/robertmaynard)
-- Enable all tests for `arm` jobs ([#63](https://github.com/rapidsai/cuvs/pull/63)) [@galipremsagar](https://github.com/galipremsagar)
+- Expose serialization to the python / c-api ([#164](https://github.com/nvidia/cuvs/pull/164)) [@benfred](https://github.com/benfred)
+- Select k instantiations ([#159](https://github.com/nvidia/cuvs/pull/159)) [@benfred](https://github.com/benfred)
+- fix devcontainer name for codespaces ([#153](https://github.com/nvidia/cuvs/pull/153)) [@trxcllnt](https://github.com/trxcllnt)
+- Accept host_mdspan for IVF-PQ build and extend ([#148](https://github.com/nvidia/cuvs/pull/148)) [@tfeher](https://github.com/tfeher)
+- Add pairwise_distance api&#39;s for C, Python and Rust ([#142](https://github.com/nvidia/cuvs/pull/142)) [@benfred](https://github.com/benfred)
+- Changing RAFT_EXPLICT_* to CUVS_EXPLITI_* ([#141](https://github.com/nvidia/cuvs/pull/141)) [@cjnolet](https://github.com/cjnolet)
+- Speed-up rust build ([#138](https://github.com/nvidia/cuvs/pull/138)) [@benfred](https://github.com/benfred)
+- Removing `libraft.so` from libcuvs dependencies ([#132](https://github.com/nvidia/cuvs/pull/132)) [@cjnolet](https://github.com/cjnolet)
+- CAGRA API update and allow async host refinement ([#131](https://github.com/nvidia/cuvs/pull/131)) [@mfoerste4](https://github.com/mfoerste4)
+- Fix rust api docs ([#119](https://github.com/nvidia/cuvs/pull/119)) [@benfred](https://github.com/benfred)
+- Migrate BFKNN from raft ([#118](https://github.com/nvidia/cuvs/pull/118)) [@benfred](https://github.com/benfred)
+- Fix IVF-PQ helper functions ([#116](https://github.com/nvidia/cuvs/pull/116)) [@lowener](https://github.com/lowener)
+- Migrate `raft::cluster` to `cuvs::cluster` ([#115](https://github.com/nvidia/cuvs/pull/115)) [@cjnolet](https://github.com/cjnolet)
+- hide RAFT #pragma deprecation warnings ([#114](https://github.com/nvidia/cuvs/pull/114)) [@trxcllnt](https://github.com/trxcllnt)
+- Enable Warnings as errors in Python tests ([#102](https://github.com/nvidia/cuvs/pull/102)) [@mroeschke](https://github.com/mroeschke)
+- Remove libnvjitlink dependency. ([#97](https://github.com/nvidia/cuvs/pull/97)) [@bdice](https://github.com/bdice)
+- Migrate to `{{ stdlib(&quot;c&quot;) }}` ([#93](https://github.com/nvidia/cuvs/pull/93)) [@hcho3](https://github.com/hcho3)
+- update: replace to cuvs from RAFT in PULL_REQUEST_TEMPLATE ([#92](https://github.com/nvidia/cuvs/pull/92)) [@hurutoriya](https://github.com/hurutoriya)
+- Add python and rust bindings for Ivf-Pq ([#90](https://github.com/nvidia/cuvs/pull/90)) [@benfred](https://github.com/benfred)
+- add --rm and --name to devcontainer run args ([#89](https://github.com/nvidia/cuvs/pull/89)) [@trxcllnt](https://github.com/trxcllnt)
+- Update pip devcontainers to UCX v1.15.0 ([#88](https://github.com/nvidia/cuvs/pull/88)) [@trxcllnt](https://github.com/trxcllnt)
+- Remove gtest from dependencies.yaml ([#87](https://github.com/nvidia/cuvs/pull/87)) [@robertmaynard](https://github.com/robertmaynard)
+- Moving and renaming distance namespaces from raft -&gt; cuvs ([#85](https://github.com/nvidia/cuvs/pull/85)) [@cjnolet](https://github.com/cjnolet)
+- Use static gtest ([#83](https://github.com/nvidia/cuvs/pull/83)) [@robertmaynard](https://github.com/robertmaynard)
+- Add python and rust bindings for Ivf-Flat ([#82](https://github.com/nvidia/cuvs/pull/82)) [@benfred](https://github.com/benfred)
+- Forward merge branch-24.04 to branch-24.06 ([#80](https://github.com/nvidia/cuvs/pull/80)) [@benfred](https://github.com/benfred)
+- Update devcontainers to use cuda12.2 ([#72](https://github.com/nvidia/cuvs/pull/72)) [@benfred](https://github.com/benfred)
+- Forward merge branch-24.04 to branch-24.06 ([#71](https://github.com/nvidia/cuvs/pull/71)) [@benfred](https://github.com/benfred)
+- Enable forward-merger ops-bot plugin ([#70](https://github.com/nvidia/cuvs/pull/70)) [@benfred](https://github.com/benfred)
+- Adds missing files to `update-version.sh` ([#69](https://github.com/nvidia/cuvs/pull/69)) [@AyodeAwe](https://github.com/AyodeAwe)
+- Add Cagra-Q compression to the python and rust api&#39;s ([#68](https://github.com/nvidia/cuvs/pull/68)) [@benfred](https://github.com/benfred)
+- ConfigureCUDA.cmake now sets CUVS_ prefixed variables ([#66](https://github.com/nvidia/cuvs/pull/66)) [@robertmaynard](https://github.com/robertmaynard)
+- Enable all tests for `arm` jobs ([#63](https://github.com/nvidia/cuvs/pull/63)) [@galipremsagar](https://github.com/galipremsagar)
diff --git a/Dockerfile b/Dockerfile
index 81a7f4f0c5..82f92468d7 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -34,7 +34,7 @@ RUN echo "  Building cuVS Docker image with:" && \
 # Container metadata
 LABEL maintainer="RAPIDS cuVS Team"
 LABEL description="RAPIDS cuVS - Vector Search and Clustering on GPU"
-LABEL org.opencontainers.image.source="https://github.com/rapidsai/cuvs"
+LABEL org.opencontainers.image.source="https://github.com/nvidia/cuvs"
 LABEL org.opencontainers.image.usage="docker run --gpus all -it <image>"
 
 # Environment setup
diff --git a/README.md b/README.md
index b4025bc835..5a38522280 100755
--- a/README.md
+++ b/README.md
@@ -15,11 +15,11 @@
 - [Documentation](https://docs.nvidia.com/cuvs): Library documentation.
 - [Build and Install Guide](https://docs.nvidia.com/cuvs/installation): Instructions for installing and building cuVS.
 - [Getting Started Guide](https://docs.nvidia.com/cuvs/getting-started): Guide to getting started with cuVS.
-- [Code Examples](https://github.com/rapidsai/cuvs/tree/HEAD/examples): Self-contained Code Examples.
+- [Code Examples](https://github.com/nvidia/cuvs/tree/HEAD/examples): Self-contained Code Examples.
 - [API Reference Documentation](https://docs.nvidia.com/cuvs/api_reference): API Documentation.
 - [RAPIDS Community](https://rapids.ai/community.html): Get help, contribute, and collaborate.
-- [GitHub repository](https://github.com/rapidsai/cuvs): Download the cuVS source code.
-- [Issue tracker](https://github.com/rapidsai/cuvs/issues): Report issues or request features.
+- [GitHub repository](https://github.com/nvidia/cuvs): Download the cuVS source code.
+- [Issue tracker](https://github.com/nvidia/cuvs/issues): Report issues or request features.
 
 ## What is cuVS?
 
@@ -114,7 +114,7 @@ cagra::index_params index_params;
 auto index = cagra::build(res, index_params, dataset);
 ```
 
-For more code examples of the C++ APIs, including drop-in Cmake project templates, please refer to the [C++ examples](https://github.com/rapidsai/cuvs/tree/HEAD/examples) directory in the codebase.
+For more code examples of the C++ APIs, including drop-in Cmake project templates, please refer to the [C++ examples](https://github.com/nvidia/cuvs/tree/HEAD/examples) directory in the codebase.
 
 ### C API
 
@@ -139,7 +139,7 @@ cuvsCagraIndexParamsDestroy(index_params);
 cuvsResourcesDestroy(res);
 ```
 
-For more code examples of the C APIs, including drop-in Cmake project templates, please refer to the [C examples](https://github.com/rapidsai/cuvs/tree/main/examples/c)
+For more code examples of the C APIs, including drop-in Cmake project templates, please refer to the [C examples](https://github.com/nvidia/cuvs/tree/main/examples/c)
 
 ### Rust API
 
@@ -202,7 +202,7 @@ fn cagra_example() -> Result<()> {
 }
 ```
 
-For more code examples of the Rust APIs, including a drop-in project templates, please refer to the [Rust examples](https://github.com/rapidsai/cuvs/tree/main/examples/rust).
+For more code examples of the Rust APIs, including a drop-in project templates, please refer to the [Rust examples](https://github.com/nvidia/cuvs/tree/main/examples/rust).
 
 ## Contributing
 
diff --git a/ci/build_java.sh b/ci/build_java.sh
index 2e363bb452..0519243375 100755
--- a/ci/build_java.sh
+++ b/ci/build_java.sh
@@ -12,7 +12,7 @@ export SCCACHE_S3_USE_PREPROCESSOR_CACHE_MODE=true
 
 # TODO: Remove this argument-handling when build and test workflows are separated,
 #       and test_java.sh no longer calls build_java.sh
-#       ref: https://github.com/rapidsai/cuvs/issues/868
+#       ref: https://github.com/nvidia/cuvs/issues/868
 EXTRA_BUILD_ARGS=()
 if [[ "${1:-}" == "--run-java-tests" ]]; then
   EXTRA_BUILD_ARGS+=("--run-java-tests")
diff --git a/ci/check_c_abi/pyproject.toml b/ci/check_c_abi/pyproject.toml
index 23c94cbeed..02d1c9a6b5 100644
--- a/ci/check_c_abi/pyproject.toml
+++ b/ci/check_c_abi/pyproject.toml
@@ -16,7 +16,7 @@ dependencies = ["libclang", "msgspec", "termcolor"]
 check-c-abi = "check_c_abi.main:main_cli"
 
 [project.urls]
-Homepage = "https://github.com/rapidsai/cuvs"
+Homepage = "https://github.com/nvidia/cuvs"
 
 [tool.setuptools.dynamic]
 version = { file = "VERSION" }
diff --git a/ci/test_java.sh b/ci/test_java.sh
index 4d7f07bc47..7cdd9493c7 100755
--- a/ci/test_java.sh
+++ b/ci/test_java.sh
@@ -17,7 +17,7 @@ RAPIDS_CUDA_MAJOR="${RAPIDS_CUDA_VERSION%%.*}"
 export RAPIDS_CUDA_MAJOR
 
 # TODO: switch to installing pre-built artifacts instead of rebuilding in test jobs
-#       ref: https://github.com/rapidsai/cuvs/issues/868
+#       ref: https://github.com/nvidia/cuvs/issues/868
 ci/build_java.sh --run-java-tests
 
 rapids-logger "Test script exiting with value: $EXITCODE"
diff --git a/cpp/bench/ann/src/diskann/diskann_wrapper.h b/cpp/bench/ann/src/diskann/diskann_wrapper.h
index b99c3c6354..3cd92e5e63 100644
--- a/cpp/bench/ann/src/diskann/diskann_wrapper.h
+++ b/cpp/bench/ann/src/diskann/diskann_wrapper.h
@@ -35,7 +35,7 @@ diskann::Metric parse_metric_to_diskann(cuvs::bench::Metric metric)
   }
 }
 
-// TODO (tarangj): Remaining features are tracked at https://github.com/rapidsai/cuvs/issues/656
+// TODO (tarangj): Remaining features are tracked at https://github.com/nvidia/cuvs/issues/656
 template <typename T>
 class diskann_memory : public algo<T> {
  public:
diff --git a/cpp/include/cuvs/cluster/kmeans.hpp b/cpp/include/cuvs/cluster/kmeans.hpp
index e2b4ea4a36..bdc76eae9f 100644
--- a/cpp/include/cuvs/cluster/kmeans.hpp
+++ b/cpp/include/cuvs/cluster/kmeans.hpp
@@ -181,7 +181,7 @@ enum class kmeans_type { KMeans = 0, KMeansBalanced = 1 };
  * @brief Find clusters with k-means algorithm using batched processing of host data.
  *
  * TODO: Evaluate replacing the extent type with int64_t. Reference issue:
- * https://github.com/rapidsai/cuvs/issues/1961
+ * https://github.com/nvidia/cuvs/issues/1961
  *
  * This overload supports out-of-core computation where the dataset resides
  * on the host. Data is processed in GPU-sized batches, streaming from host to device.
diff --git a/cpp/include/cuvs/cluster/spectral.hpp b/cpp/include/cuvs/cluster/spectral.hpp
index deea697743..5ef0616141 100644
--- a/cpp/include/cuvs/cluster/spectral.hpp
+++ b/cpp/include/cuvs/cluster/spectral.hpp
@@ -49,7 +49,7 @@ struct params {
  * @{
  */
 
-// TODO: int64_t nnz support (see https://github.com/rapidsai/cuvs/issues/1484)
+// TODO: int64_t nnz support (see https://github.com/nvidia/cuvs/issues/1484)
 
 /**
  * @brief Perform spectral clustering on a connectivity graph
diff --git a/cpp/include/cuvs/neighbors/common.hpp b/cpp/include/cuvs/neighbors/common.hpp
index 2fd804f115..54247b044c 100644
--- a/cpp/include/cuvs/neighbors/common.hpp
+++ b/cpp/include/cuvs/neighbors/common.hpp
@@ -758,7 +758,7 @@ constexpr static IdxT kInvalidRecord =
  * @tparam IdxT The index type for source indices
  * @tparam SizeT The size type
  *
- * TODO: Make this struct internal (tracking issue: https://github.com/rapidsai/cuvs/issues/1726)
+ * TODO: Make this struct internal (tracking issue: https://github.com/nvidia/cuvs/issues/1726)
  */
 template <typename ValueT, typename IdxT, typename SizeT = uint32_t>
 struct list_base {
diff --git a/cpp/include/cuvs/neighbors/hnsw.hpp b/cpp/include/cuvs/neighbors/hnsw.hpp
index fb726fed71..ed1c43356f 100644
--- a/cpp/include/cuvs/neighbors/hnsw.hpp
+++ b/cpp/include/cuvs/neighbors/hnsw.hpp
@@ -750,7 +750,7 @@ struct search_params : cuvs::neighbors::search_params {
  * @}
  */
 
-// TODO: Filtered Search APIs: https://github.com/rapidsai/cuvs/issues/363
+// TODO: Filtered Search APIs: https://github.com/nvidia/cuvs/issues/363
 
 /**
  * @defgroup hnsw_cpp_index_search Search hnswlib index
diff --git a/cpp/src/neighbors/all_neighbors/all_neighbors_merge.cuh b/cpp/src/neighbors/all_neighbors/all_neighbors_merge.cuh
index f9088a2730..f528045d80 100644
--- a/cpp/src/neighbors/all_neighbors/all_neighbors_merge.cuh
+++ b/cpp/src/neighbors/all_neighbors/all_neighbors_merge.cuh
@@ -116,7 +116,7 @@ RAFT_KERNEL merge_subgraphs_kernel(IdxT* cluster_data_indices,
         // to each other after sorting by distances. Thus, for now we sweep a neighboring window of
         // size 4 or sweep the entire row to check for duplicates, and keep the first occurrence
         // only.
-        // related issue: https://github.com/rapidsai/cuvs/issues/1056
+        // related issue: https://github.com/nvidia/cuvs/issues/1056
         // uniqueMask[colId] = static_cast<int16_t>(blockValues[colId] != blockValues[colId - 1]);
 
         int is_unique = 1;
diff --git a/cpp/src/neighbors/detail/cagra/cagra_build.cuh b/cpp/src/neighbors/detail/cagra/cagra_build.cuh
index a7c15b4161..0a82f2cd5b 100644
--- a/cpp/src/neighbors/detail/cagra/cagra_build.cuh
+++ b/cpp/src/neighbors/detail/cagra/cagra_build.cuh
@@ -94,7 +94,7 @@ void ace_get_partition_labels(
 
   // Sampling vectors from dataset. Uses float conversion on host instead of
   // raft::matrix::sample_rows to minimize GPU memory usage.
-  // TODO(julianmi): Switch to sample_rows when https://github.com/rapidsai/cuvs/issues/1461 is
+  // TODO(julianmi): Switch to sample_rows when https://github.com/nvidia/cuvs/issues/1461 is
   // addressed.
   size_t n_samples         = dataset_size * sampling_rate;
   const size_t min_samples = 100 * n_partitions;
diff --git a/cpp/src/neighbors/detail/nn_descent.cuh b/cpp/src/neighbors/detail/nn_descent.cuh
index f7b80fbf7f..21da188481 100644
--- a/cpp/src/neighbors/detail/nn_descent.cuh
+++ b/cpp/src/neighbors/detail/nn_descent.cuh
@@ -539,7 +539,7 @@ __device__ __forceinline__ void calculate_metric(float* s_distances,
         s_distances[i] = 0.0;
         int n1         = row_neighbors[row_id];
         int n2         = col_neighbors[col_id];
-        // TODO: https://github.com/rapidsai/cuvs/issues/1127
+        // TODO: https://github.com/nvidia/cuvs/issues/1127
         const uint8_t* data_n1 = reinterpret_cast<const uint8_t*>(data) + n1 * data_dim;
         const uint8_t* data_n2 = reinterpret_cast<const uint8_t*>(data) + n2 * data_dim;
         for (int d = 0; d < data_dim; d++) {
@@ -550,7 +550,7 @@ __device__ __forceinline__ void calculate_metric(float* s_distances,
         s_distances[i] =
           l2_norms[row_neighbors[row_id]] + l2_norms[col_neighbors[col_id]] - 2.0 * s_distances[i];
         // for fp32 vs fp16 precision differences resulting in negative distances when distance
-        // should be 0 related issue: https://github.com/rapidsai/cuvs/issues/991
+        // should be 0 related issue: https://github.com/nvidia/cuvs/issues/991
         s_distances[i] = s_distances[i] < 0.0f ? 0.0f : s_distances[i];
         if (!can_postprocess_dist && metric == cuvs::distance::DistanceType::L2SqrtExpanded) {
           s_distances[i] = sqrtf(s_distances[i]);
diff --git a/cpp/src/neighbors/detail/vamana/vamana_build.cuh b/cpp/src/neighbors/detail/vamana/vamana_build.cuh
index 336d81215b..5a04ebeba2 100644
--- a/cpp/src/neighbors/detail/vamana/vamana_build.cuh
+++ b/cpp/src/neighbors/detail/vamana/vamana_build.cuh
@@ -216,7 +216,7 @@ void batched_insert_vamana(
 
   // Random medoid has minor impact on recall
   // TODO: use heuristic for better medoid selection, issue:
-  // https://github.com/rapidsai/cuvs/issues/355
+  // https://github.com/nvidia/cuvs/issues/355
   *medoid_id = rand() % N;
 
   // size of current batch of inserts, increases logarithmically until max_batchsize
diff --git a/cpp/src/neighbors/detail/vpq_dataset.cuh b/cpp/src/neighbors/detail/vpq_dataset.cuh
index ec4a684274..0a5a30f934 100644
--- a/cpp/src/neighbors/detail/vpq_dataset.cuh
+++ b/cpp/src/neighbors/detail/vpq_dataset.cuh
@@ -401,7 +401,7 @@ __launch_bounds__(BlockSize) RAFT_KERNEL process_and_fill_codes_kernel(
 /**
  * Note: `inline_vq_labels` should only be used for CAGRA-Q compatibility or internal use-cases.
  * Otherwise, vq_labels should be preferred.
- * Issue: https://github.com/rapidsai/cuvs/issues/1722
+ * Issue: https://github.com/nvidia/cuvs/issues/1722
  */
 template <typename MathT, typename IdxT, typename DatasetT>
 void process_and_fill_codes(
diff --git a/cpp/src/neighbors/mg/snmg.cuh b/cpp/src/neighbors/mg/snmg.cuh
index 43e4aa4471..a4f45e906a 100644
--- a/cpp/src/neighbors/mg/snmg.cuh
+++ b/cpp/src/neighbors/mg/snmg.cuh
@@ -607,7 +607,7 @@ void search(const raft::resources& clique,
       cuvs::core::omp::check_threads(index.num_ranks_);
 // Each rank gets its own thread; that thread handles all batches for that rank sequentially.
 // This prevents concurrent access to the same GPU from multiple threads. (see
-// https://github.com/rapidsai/cuvs/issues/1720)
+// https://github.com/nvidia/cuvs/issues/1720)
 #pragma omp parallel for num_threads(index.num_ranks_)
       for (int rank = 0; rank < index.num_ranks_; rank++) {
         for (int64_t batch_idx = rank; batch_idx < n_batches; batch_idx += index.num_ranks_) {
diff --git a/cpp/tests/neighbors/ann_cagra/bug_issue_93_reproducer.cu b/cpp/tests/neighbors/ann_cagra/bug_issue_93_reproducer.cu
index 6b4b037167..d7d492cc74 100644
--- a/cpp/tests/neighbors/ann_cagra/bug_issue_93_reproducer.cu
+++ b/cpp/tests/neighbors/ann_cagra/bug_issue_93_reproducer.cu
@@ -2,7 +2,7 @@
  * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
  * SPDX-License-Identifier: Apache-2.0
  *
- * Reproducer for https://github.com/rapidsai/cuvs-lucene/issues/93
+ * Reproducer for https://github.com/nvidia/cuvs-lucene/issues/93
  *   cuvsCagraSearch returned 0 (Reason=cudaErrorInvalidValue:invalid argument)
  *
  * ROOT CAUSE:
diff --git a/cpp/tests/neighbors/ann_cagra/bug_iterative_cagra_build.cu b/cpp/tests/neighbors/ann_cagra/bug_iterative_cagra_build.cu
index aaee5a77e5..00af4987d3 100644
--- a/cpp/tests/neighbors/ann_cagra/bug_iterative_cagra_build.cu
+++ b/cpp/tests/neighbors/ann_cagra/bug_iterative_cagra_build.cu
@@ -27,7 +27,7 @@ class CagraIterativeBuildBugTest : public ::testing::Test {
     // Set up iterative CAGRA graph building
     cagra::index_params index_params;
     // The bug manifests when graph_degree is equal to intermediate_graph_degree
-    // see issue https://github.com/rapidsai/cuvs/issues/1818
+    // see issue https://github.com/nvidia/cuvs/issues/1818
     index_params.graph_degree              = 16;
     index_params.intermediate_graph_degree = 16;
 
diff --git a/cpp/tests/neighbors/ann_ivf_flat.cuh b/cpp/tests/neighbors/ann_ivf_flat.cuh
index ffc1d03bf6..88b6d168d8 100644
--- a/cpp/tests/neighbors/ann_ivf_flat.cuh
+++ b/cpp/tests/neighbors/ann_ivf_flat.cuh
@@ -545,7 +545,7 @@ const std::vector<AnnIvfFlatInputs<int64_t>> inputs = {
   {1000, 10000, 2050, 16, 40, 1024, cuvs::distance::DistanceType::InnerProduct, false},
   {1000, 10000, 2050, 16, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, false},
   // TODO: Re-enable test after adjusting parameters for higher recall. See
-  // https://github.com/rapidsai/cuvs/issues/1091
+  // https://github.com/nvidia/cuvs/issues/1091
   // {1000, 10000, 2051, 16, 40, 1024, cuvs::distance::DistanceType::InnerProduct, true},
   {1000, 10000, 2051, 16, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, true},
   {1000, 10000, 2052, 16, 40, 1024, cuvs::distance::DistanceType::InnerProduct, false},
diff --git a/cpp/tests/neighbors/ann_ivf_pq.cuh b/cpp/tests/neighbors/ann_ivf_pq.cuh
index 033f0af9c2..3ca727c24a 100644
--- a/cpp/tests/neighbors/ann_ivf_pq.cuh
+++ b/cpp/tests/neighbors/ann_ivf_pq.cuh
@@ -1109,7 +1109,7 @@ inline auto enum_variety_cosine() -> test_cases_t
     if (y.min_recall.has_value()) {
       if (y.search_params.lut_dtype == CUDA_R_8U) {
         // TODO: Increase this recall threshold for 8 bit lut
-        // (https://github.com/rapidsai/cuvs/issues/390)
+        // (https://github.com/nvidia/cuvs/issues/390)
         y.min_recall = y.min_recall.value() * 0.70;
       } else {
         // In other cases it seems to perform a little bit better, still worse than L2
diff --git a/examples/go/README.md b/examples/go/README.md
index d5d3ac34f0..292ca66114 100644
--- a/examples/go/README.md
+++ b/examples/go/README.md
@@ -24,7 +24,7 @@ export CC=clang
 
 2. Install the Go module:
 ```bash
-go get github.com/rapidsai/cuvs/go@v26.08.00 # 25.02.00 being your desired version, selected from https://github.com/rapidsai/cuvs/tags
+go get github.com/nvidia/cuvs/go@v26.08.00 # 25.02.00 being your desired version, selected from https://github.com/nvidia/cuvs/tags
 ```
 Then you can build your project with the usual `go build`.
 
@@ -36,8 +36,8 @@ Note: The installation will fail if the C libraries are not properly installed a
 package main
 
 import (
-    "github.com/rapidsai/cuvs/go"
-    "github.com/rapidsai/cuvs/go/cagra"
+    "github.com/nvidia/cuvs/go"
+    "github.com/nvidia/cuvs/go/cagra"
 )
 
 func main() {
diff --git a/examples/go/go.mod b/examples/go/go.mod
index 7f034ddfdc..f421ad23bd 100644
--- a/examples/go/go.mod
+++ b/examples/go/go.mod
@@ -4,4 +4,4 @@ go 1.22.4
 
 toolchain go1.23.6
 
-require github.com/rapidsai/cuvs/go v0.0.0-20250313193519-2f9df39e52cc
+require github.com/nvidia/cuvs/go v0.0.0-20250313193519-2f9df39e52cc
diff --git a/examples/go/go.sum b/examples/go/go.sum
index 1f7a1b9c3d..6afe3667f5 100644
--- a/examples/go/go.sum
+++ b/examples/go/go.sum
@@ -1,2 +1,2 @@
-github.com/rapidsai/cuvs/go v0.0.0-20250313193519-2f9df39e52cc h1:KaikmdTcld8icYGg5/5K1U69FTRpXlnYJZPBPFPrV5g=
-github.com/rapidsai/cuvs/go v0.0.0-20250313193519-2f9df39e52cc/go.mod h1:qQPopaJ6Z5DXM+HqtP8TzatknrfiCE7vBf/p1+lVFr8=
+github.com/nvidia/cuvs/go v0.0.0-20250313193519-2f9df39e52cc h1:KaikmdTcld8icYGg5/5K1U69FTRpXlnYJZPBPFPrV5g=
+github.com/nvidia/cuvs/go v0.0.0-20250313193519-2f9df39e52cc/go.mod h1:qQPopaJ6Z5DXM+HqtP8TzatknrfiCE7vBf/p1+lVFr8=
diff --git a/examples/go/main.go b/examples/go/main.go
index d3b45b0eda..8ada2a9154 100644
--- a/examples/go/main.go
+++ b/examples/go/main.go
@@ -6,8 +6,8 @@ import (
 	"math/rand"
 	"time"
 
-	cuvs "github.com/rapidsai/cuvs/go"
-	"github.com/rapidsai/cuvs/go/cagra"
+	cuvs "github.com/nvidia/cuvs/go"
+	"github.com/nvidia/cuvs/go/cagra"
 )
 
 func main() {
diff --git a/fern/docs.yml b/fern/docs.yml
index dd99facae6..629f220578 100644
--- a/fern/docs.yml
+++ b/fern/docs.yml
@@ -71,7 +71,7 @@ experimental:
 navbar-links:
   - type: "secondary"
     text: "GitHub"
-    url: "https://github.com/rapidsai/cuvs"
+    url: "https://github.com/nvidia/cuvs"
   - type: "secondary"
     text: "CUDA-X"
     url: "https://www.nvidia.com/en-us/technologies/cuda-x/"
diff --git a/fern/pages/contributing.md b/fern/pages/contributing.md
index 759a75070f..cb18fcd8dd 100755
--- a/fern/pages/contributing.md
+++ b/fern/pages/contributing.md
@@ -3,7 +3,7 @@
 If you are interested in contributing to NVIDIA cuVS, your contributions will fall
 into three categories:
 1. You want to report a bug, feature request, or documentation issue
-    - File an [issue](https://github.com/rapidsai/cuvs/issues/new/choose)
+    - File an [issue](https://github.com/nvidia/cuvs/issues/new/choose)
     describing what you encountered or what you want to see changed.
     - The RAPIDS team will evaluate the issues and triage them, scheduling
     them for a release. If you believe the issue needs priority attention
@@ -24,13 +24,13 @@ Performance-critical changes require [proper benchmarks](/developer-guide/coding
 
 ### Your first issue
 
-1. Read the project's [README.md](https://github.com/rapidsai/cuvs)
+1. Read the project's [README.md](https://github.com/nvidia/cuvs)
     to learn how to setup the development environment
 2. Find an issue to work on. The best way is to look for the [good first issue](https://github.com/rapidsai/CUVS/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22)
-    or [help wanted](https://github.com/rapidsai/cuvs/issues?q=is%3Aissue+is%3Aopen+label%3A%22help+wanted%22) labels
+    or [help wanted](https://github.com/nvidia/cuvs/issues?q=is%3Aissue+is%3Aopen+label%3A%22help+wanted%22) labels
 3. Comment on the issue saying you are going to work on it
 4. Code! Make sure to update unit tests!
-5. When done, [create your pull request](https://github.com/rapidsai/cuvs/compare)
+5. When done, [create your pull request](https://github.com/nvidia/cuvs/compare)
 6. Verify that CI passes all [status checks](https://help.github.com/articles/about-status-checks/). Fix if needed
 7. Wait for other developers to review your code and update code as needed
 8. Once reviewed and approved, a RAPIDS developer will merge your pull request
diff --git a/fern/pages/cpp_api/cpp-api-cluster-kmeans.md b/fern/pages/cpp_api/cpp-api-cluster-kmeans.md
index cbb5a73de7..7e53a7ea96 100644
--- a/fern/pages/cpp_api/cpp-api-cluster-kmeans.md
+++ b/fern/pages/cpp_api/cpp-api-cluster-kmeans.md
@@ -126,7 +126,7 @@ raft::host_scalar_view<float> inertia,
 raft::host_scalar_view<int64_t> n_iter);
 ```
 
-TODO: Evaluate replacing the extent type with int64_t. Reference issue: https://github.com/rapidsai/cuvs/issues/1961
+TODO: Evaluate replacing the extent type with int64_t. Reference issue: https://github.com/nvidia/cuvs/issues/1961
 
 This overload supports out-of-core computation where the dataset resides on the host. Data is processed in GPU-sized batches, streaming from host to device. The batch size is controlled by params.streaming_batch_size. In multi-GPU mode, this is a per-rank batch size.
 
diff --git a/fern/pages/cpp_api/cpp-api-neighbors-common.md b/fern/pages/cpp_api/cpp-api-neighbors-common.md
index 68430cf6fb..049252fd40 100644
--- a/fern/pages/cpp_api/cpp-api-neighbors-common.md
+++ b/fern/pages/cpp_api/cpp-api-neighbors-common.md
@@ -103,7 +103,7 @@ Abstract base class for IVF list data.
 
 This allows polymorphic access to list data regardless of the underlying layout.
 
-TODO: Make this struct internal (tracking issue: https://github.com/rapidsai/cuvs/issues/1726)
+TODO: Make this struct internal (tracking issue: https://github.com/nvidia/cuvs/issues/1726)
 
 ```cpp
 template <typename ValueT, typename IdxT, typename SizeT = uint32_t>
diff --git a/fern/pages/cpp_guidelines.md b/fern/pages/cpp_guidelines.md
index 218309843a..aea103d744 100644
--- a/fern/pages/cpp_guidelines.md
+++ b/fern/pages/cpp_guidelines.md
@@ -288,7 +288,7 @@ pre-commit install
 
 ### Core Hooks
 
-C++ and CUDA code are formatted with [clang-format](https://clang.llvm.org/docs/ClangFormat.html). NVIDIA cuVS follows the Google C++ style with a few local adjustments documented in [cpp/.clang-format](https://github.com/rapidsai/cuvs/blob/main/cpp/.clang-format):
+C++ and CUDA code are formatted with [clang-format](https://clang.llvm.org/docs/ClangFormat.html). NVIDIA cuVS follows the Google C++ style with a few local adjustments documented in [cpp/.clang-format](https://github.com/nvidia/cuvs/blob/main/cpp/.clang-format):
 
 1. Empty functions, records, and namespaces are not split.
 2. Indentation is two spaces, including line continuations.
diff --git a/fern/pages/cuvs_bench/datasets.md b/fern/pages/cuvs_bench/datasets.md
index 3b9a1a9bc3..c1f8c52e07 100644
--- a/fern/pages/cuvs_bench/datasets.md
+++ b/fern/pages/cuvs_bench/datasets.md
@@ -96,7 +96,7 @@ For billion-scale sources that provide ground truth for only the first 10M or 10
 
 ## Dataset configurations
 
-Each benchmark dataset needs a YAML descriptor with file names and basic properties. Common descriptors are available in [datasets.yaml](https://github.com/rapidsai/cuvs/blob/branch-25.04/python/cuvs_bench/cuvs_bench/config/datasets/datasets.yaml).
+Each benchmark dataset needs a YAML descriptor with file names and basic properties. Common descriptors are available in [datasets.yaml](https://github.com/nvidia/cuvs/blob/branch-25.04/python/cuvs_bench/cuvs_bench/config/datasets/datasets.yaml).
 
 The default `${CUVS_HOME}/python/cuvs_bench/cuvs_bench/config/datasets/datasets.yaml` includes entries like:
 
diff --git a/fern/pages/index.md b/fern/pages/index.md
index 205e2c9837..2660c8767c 100644
--- a/fern/pages/index.md
+++ b/fern/pages/index.md
@@ -6,11 +6,11 @@ NVIDIA cuVS is a GPU-accelerated library for vector search on the GPU. Vector se
 
 [cuvs_reference]: https://docs.rapids.ai/api/cuvs/stable/
 
-- [Example Notebooks](https://github.com/rapidsai/cuvs/tree/HEAD/notebooks): Example notebooks
-- [Code Examples](https://github.com/rapidsai/cuvs/tree/HEAD/examples): Self-contained code examples
+- [Example Notebooks](https://github.com/nvidia/cuvs/tree/HEAD/notebooks): Example notebooks
+- [Code Examples](https://github.com/nvidia/cuvs/tree/HEAD/examples): Self-contained code examples
 - [RAPIDS Community](https://rapids.ai/community.html): Get help, contribute, and collaborate.
-- [GitHub repository](https://github.com/rapidsai/cuvs): Download the NVIDIA cuVS source code.
-- [Issue tracker](https://github.com/rapidsai/cuvs/issues): Report issues or request features.
+- [GitHub repository](https://github.com/nvidia/cuvs): Download the NVIDIA cuVS source code.
+- [Issue tracker](https://github.com/nvidia/cuvs/issues): Report issues or request features.
 
 ## What is NVIDIA cuVS?
 
diff --git a/fern/pages/installation/go.md b/fern/pages/installation/go.md
index 700c6e6f4a..ea0d37c9b1 100644
--- a/fern/pages/installation/go.md
+++ b/fern/pages/installation/go.md
@@ -31,7 +31,7 @@ export CC=clang
 Add the Go module to your project:
 
 ```bash
-go get github.com/rapidsai/cuvs/go@v26.06.00
+go get github.com/nvidia/cuvs/go@v26.06.00
 ```
 
 Then build your project with the usual Go tooling:
diff --git a/fern/pages/integrations.md b/fern/pages/integrations.md
index 5ba3fc9a4e..b587612356 100644
--- a/fern/pages/integrations.md
+++ b/fern/pages/integrations.md
@@ -90,4 +90,4 @@ Use these integrations when you want library-level control inside an application
 
 ### Lucene
 
-[NVIDIA cuVS Lucene](https://github.com/rapidsai/cuvs-lucene) provides a Lucene `KnnVectorFormat` that lets Java search applications use NVIDIA cuVS through Lucene codecs. The package is published as [`com.nvidia.cuvs.lucene:cuvs-lucene`](https://central.sonatype.com/artifact/com.nvidia.cuvs.lucene/cuvs-lucene) and builds on the NVIDIA cuVS Java APIs. The integration targets GPU-accelerated vector indexing and search paths for Lucene-based systems, including CAGRA graph construction, filtering, index merge support, and off-heap data movement. See the SearchScale and NVIDIA writeup on [Apache Lucene accelerated with NVIDIA cuVS](https://searchscale.com/blog/apache-lucene-accelerated-with-nvidia-cuvs-25.06-release/).
+[NVIDIA cuVS Lucene](https://github.com/nvidia/cuvs-lucene) provides a Lucene `KnnVectorFormat` that lets Java search applications use NVIDIA cuVS through Lucene codecs. The package is published as [`com.nvidia.cuvs.lucene:cuvs-lucene`](https://central.sonatype.com/artifact/com.nvidia.cuvs.lucene/cuvs-lucene) and builds on the NVIDIA cuVS Java APIs. The integration targets GPU-accelerated vector indexing and search paths for Lucene-based systems, including CAGRA graph construction, filtering, index merge support, and off-heap data movement. See the SearchScale and NVIDIA writeup on [Apache Lucene accelerated with NVIDIA cuVS](https://searchscale.com/blog/apache-lucene-accelerated-with-nvidia-cuvs-25.06-release/).
diff --git a/fern/pages/neighbors/bruteforce.md b/fern/pages/neighbors/bruteforce.md
index 70c22eceb5..815ab306d4 100644
--- a/fern/pages/neighbors/bruteforce.md
+++ b/fern/pages/neighbors/bruteforce.md
@@ -102,8 +102,8 @@ fn build_brute_force_index(dataset: &ndarray::Array2<f32>) -> Result<Index> {
 package main
 
 import (
-	cuvs "github.com/rapidsai/cuvs/go"
-	"github.com/rapidsai/cuvs/go/brute_force"
+	cuvs "github.com/nvidia/cuvs/go"
+	"github.com/nvidia/cuvs/go/brute_force"
 )
 
 func buildBruteForceIndex(dataset cuvs.Tensor[float32]) (*brute_force.BruteForceIndex, error) {
diff --git a/fern/pages/neighbors/cagra.md b/fern/pages/neighbors/cagra.md
index c1eb80faea..05a1ed622e 100644
--- a/fern/pages/neighbors/cagra.md
+++ b/fern/pages/neighbors/cagra.md
@@ -106,8 +106,8 @@ fn build_cagra_index(dataset: &ndarray::Array2<f32>) -> Result<Index> {
 package main
 
 import (
-	cuvs "github.com/rapidsai/cuvs/go"
-	"github.com/rapidsai/cuvs/go/cagra"
+	cuvs "github.com/nvidia/cuvs/go"
+	"github.com/nvidia/cuvs/go/cagra"
 )
 
 func buildCagraIndex(dataset cuvs.Tensor[float32]) (*cagra.CagraIndex, error) {
@@ -204,8 +204,8 @@ index = cagra.extend(cagra.ExtendParams(), index, additional_dataset)
 package main
 
 import (
-	cuvs "github.com/rapidsai/cuvs/go"
-	"github.com/rapidsai/cuvs/go/cagra"
+	cuvs "github.com/nvidia/cuvs/go"
+	"github.com/nvidia/cuvs/go/cagra"
 )
 
 func extendCagraIndex(
diff --git a/fern/pages/neighbors/ivfflat.md b/fern/pages/neighbors/ivfflat.md
index ea4127b6ed..30c83311de 100644
--- a/fern/pages/neighbors/ivfflat.md
+++ b/fern/pages/neighbors/ivfflat.md
@@ -97,8 +97,8 @@ fn build_ivf_flat_index(dataset: &ndarray::Array2<f32>) -> Result<Index> {
 package main
 
 import (
-	cuvs "github.com/rapidsai/cuvs/go"
-	"github.com/rapidsai/cuvs/go/ivf_flat"
+	cuvs "github.com/nvidia/cuvs/go"
+	"github.com/nvidia/cuvs/go/ivf_flat"
 )
 
 func buildIvfFlatIndex(dataset cuvs.Tensor[float32]) (*ivf_flat.IvfFlatIndex, error) {
diff --git a/fern/pages/neighbors/ivfpq.md b/fern/pages/neighbors/ivfpq.md
index 8a2db90afc..d50a77f5ce 100644
--- a/fern/pages/neighbors/ivfpq.md
+++ b/fern/pages/neighbors/ivfpq.md
@@ -110,8 +110,8 @@ fn build_ivf_pq_index(dataset: &ndarray::Array2<f32>) -> Result<Index> {
 package main
 
 import (
-	cuvs "github.com/rapidsai/cuvs/go"
-	"github.com/rapidsai/cuvs/go/ivf_pq"
+	cuvs "github.com/nvidia/cuvs/go"
+	"github.com/nvidia/cuvs/go/ivf_pq"
 )
 
 func buildIvfPqIndex(dataset cuvs.Tensor[float32]) (*ivf_pq.IvfPqIndex, error) {
diff --git a/fern/pages/other/memory_management.md b/fern/pages/other/memory_management.md
index 90e545d26f..e6776f4ee0 100644
--- a/fern/pages/other/memory_management.md
+++ b/fern/pages/other/memory_management.md
@@ -132,7 +132,7 @@ fn main() -> Result<(), &'static str> {
 ```go
 package main
 
-import cuvs "github.com/rapidsai/cuvs/go"
+import cuvs "github.com/nvidia/cuvs/go"
 
 func main() error {
 	mem, err := cuvs.NewCuvsPoolMemory(50, 90, false)
@@ -411,7 +411,7 @@ fn main() -> Result<(), &'static str> {
 ```go
 package main
 
-import cuvs "github.com/rapidsai/cuvs/go"
+import cuvs "github.com/nvidia/cuvs/go"
 
 func main() error {
 	mem, err := cuvs.NewCuvsPoolMemory(50, 90, true)
diff --git a/fern/pages/other/multidimensional_arrays.md b/fern/pages/other/multidimensional_arrays.md
index b87e90691e..c5f7ec7d5e 100644
--- a/fern/pages/other/multidimensional_arrays.md
+++ b/fern/pages/other/multidimensional_arrays.md
@@ -200,7 +200,7 @@ fn allocate_device_matrix(n_rows: usize, n_features: usize) -> Result<ManagedTen
 ```go
 package main
 
-import cuvs "github.com/rapidsai/cuvs/go"
+import cuvs "github.com/nvidia/cuvs/go"
 
 func allocateDeviceMatrix(nRows int64, nFeatures int64) error {
 	resource, err := cuvs.NewResource(nil)
@@ -330,8 +330,8 @@ fn build_index(dataset: &ndarray::Array2<f32>) -> Result<Index> {
 package main
 
 import (
-	cuvs "github.com/rapidsai/cuvs/go"
-	"github.com/rapidsai/cuvs/go/brute_force"
+	cuvs "github.com/nvidia/cuvs/go"
+	"github.com/nvidia/cuvs/go/brute_force"
 )
 
 func buildIndex(data [][]float32) (*brute_force.BruteForceIndex, error) {
@@ -505,8 +505,8 @@ fn search(
 package main
 
 import (
-	cuvs "github.com/rapidsai/cuvs/go"
-	"github.com/rapidsai/cuvs/go/brute_force"
+	cuvs "github.com/nvidia/cuvs/go"
+	"github.com/nvidia/cuvs/go/brute_force"
 )
 
 func searchWithOutputs(
diff --git a/fern/pages/other/resources.md b/fern/pages/other/resources.md
index 6a7562c0bb..6a3582fde9 100644
--- a/fern/pages/other/resources.md
+++ b/fern/pages/other/resources.md
@@ -141,7 +141,7 @@ fn run() -> Result<()> {
 ```go
 package main
 
-import cuvs "github.com/rapidsai/cuvs/go"
+import cuvs "github.com/nvidia/cuvs/go"
 
 func main() error {
 	resource, err := cuvs.NewResource(nil)
@@ -230,7 +230,7 @@ fn run() -> Result<()> {
 ```go
 package main
 
-import cuvs "github.com/rapidsai/cuvs/go"
+import cuvs "github.com/nvidia/cuvs/go"
 
 func syncWork(resource cuvs.Resource) error {
 	// Call cuVS Go APIs with resource first.
@@ -416,7 +416,7 @@ fn configure_stream(stream: cudaStream_t) -> cuvs::Result<Resources> {
 package main
 
 import "C"
-import cuvs "github.com/rapidsai/cuvs/go"
+import cuvs "github.com/nvidia/cuvs/go"
 
 func newResourceOnStream(stream C.cudaStream_t) (cuvs.Resource, error) {
 	return cuvs.NewResource(stream)
diff --git a/go/brute_force/brute_force.go b/go/brute_force/brute_force.go
index c75447b99b..894ac8df93 100644
--- a/go/brute_force/brute_force.go
+++ b/go/brute_force/brute_force.go
@@ -7,7 +7,7 @@ import (
 	"errors"
 	"unsafe"
 
-	cuvs "github.com/rapidsai/cuvs/go"
+	cuvs "github.com/nvidia/cuvs/go"
 )
 
 // Brute Force KNN Index
diff --git a/go/brute_force/brute_force_test.go b/go/brute_force/brute_force_test.go
index ba9ac898f5..9525cc3691 100644
--- a/go/brute_force/brute_force_test.go
+++ b/go/brute_force/brute_force_test.go
@@ -4,7 +4,7 @@ import (
 	"math/rand/v2"
 	"testing"
 
-	cuvs "github.com/rapidsai/cuvs/go"
+	cuvs "github.com/nvidia/cuvs/go"
 )
 
 func TestBruteForce(t *testing.T) {
diff --git a/go/cagra/cagra.go b/go/cagra/cagra.go
index c68e85e956..2680999fd9 100644
--- a/go/cagra/cagra.go
+++ b/go/cagra/cagra.go
@@ -7,7 +7,7 @@ import (
 	"errors"
 	"unsafe"
 
-	cuvs "github.com/rapidsai/cuvs/go"
+	cuvs "github.com/nvidia/cuvs/go"
 )
 
 // Cagra ANN Index
diff --git a/go/cagra/cagra_test.go b/go/cagra/cagra_test.go
index bb4fd0a0a1..833f15ab75 100644
--- a/go/cagra/cagra_test.go
+++ b/go/cagra/cagra_test.go
@@ -4,7 +4,7 @@ import (
 	"math/rand/v2"
 	"testing"
 
-	cuvs "github.com/rapidsai/cuvs/go"
+	cuvs "github.com/nvidia/cuvs/go"
 )
 
 func TestCagra(t *testing.T) {
diff --git a/go/cagra/extend_params.go b/go/cagra/extend_params.go
index 7e173c0456..ccb09919ed 100644
--- a/go/cagra/extend_params.go
+++ b/go/cagra/extend_params.go
@@ -4,7 +4,7 @@ package cagra
 import "C"
 
 import (
-	cuvs "github.com/rapidsai/cuvs/go"
+	cuvs "github.com/nvidia/cuvs/go"
 )
 
 // Parameters to extend CAGRA Index
diff --git a/go/cagra/index_params.go b/go/cagra/index_params.go
index 99f4b70b93..9b9ccd1a27 100644
--- a/go/cagra/index_params.go
+++ b/go/cagra/index_params.go
@@ -6,7 +6,7 @@ import "C"
 import (
 	"errors"
 
-	cuvs "github.com/rapidsai/cuvs/go"
+	cuvs "github.com/nvidia/cuvs/go"
 )
 
 type IndexParams struct {
diff --git a/go/cagra/search_params.go b/go/cagra/search_params.go
index 94207d2881..763b4f0272 100644
--- a/go/cagra/search_params.go
+++ b/go/cagra/search_params.go
@@ -6,7 +6,7 @@ import "C"
 import (
 	"errors"
 
-	cuvs "github.com/rapidsai/cuvs/go"
+	cuvs "github.com/nvidia/cuvs/go"
 )
 
 // Supplemental parameters to search CAGRA Index
diff --git a/go/distance_test.go b/go/distance_test.go
index 17f40a1ba2..b8652f7c56 100644
--- a/go/distance_test.go
+++ b/go/distance_test.go
@@ -5,7 +5,7 @@ import (
 	"testing"
 	"time"
 
-	cuvs "github.com/rapidsai/cuvs/go"
+	cuvs "github.com/nvidia/cuvs/go"
 )
 
 func TestDistance(t *testing.T) {
diff --git a/go/dlpack_test.go b/go/dlpack_test.go
index 135363f811..8d88d9ea51 100644
--- a/go/dlpack_test.go
+++ b/go/dlpack_test.go
@@ -6,7 +6,7 @@ import (
 	"testing"
 	"time"
 
-	cuvs "github.com/rapidsai/cuvs/go"
+	cuvs "github.com/nvidia/cuvs/go"
 )
 
 func TestDlPack(t *testing.T) {
diff --git a/go/go.mod b/go/go.mod
index 4e1ab39a44..cc758e0d4b 100644
--- a/go/go.mod
+++ b/go/go.mod
@@ -1,3 +1,3 @@
-module github.com/rapidsai/cuvs/go
+module github.com/nvidia/cuvs/go
 
 go 1.22.4
diff --git a/go/ivf_flat/index_params.go b/go/ivf_flat/index_params.go
index f6cbe60fbf..b3651c43d7 100644
--- a/go/ivf_flat/index_params.go
+++ b/go/ivf_flat/index_params.go
@@ -6,7 +6,7 @@ import "C"
 import (
 	"errors"
 
-	cuvs "github.com/rapidsai/cuvs/go"
+	cuvs "github.com/nvidia/cuvs/go"
 )
 
 // Supplemental parameters to build IVF Flat Index
diff --git a/go/ivf_flat/ivf_flat.go b/go/ivf_flat/ivf_flat.go
index 79000978ce..d145a898a0 100644
--- a/go/ivf_flat/ivf_flat.go
+++ b/go/ivf_flat/ivf_flat.go
@@ -7,7 +7,7 @@ import (
 	"errors"
 	"unsafe"
 
-	cuvs "github.com/rapidsai/cuvs/go"
+	cuvs "github.com/nvidia/cuvs/go"
 )
 
 // IVF Flat Index
diff --git a/go/ivf_flat/ivf_flat_test.go b/go/ivf_flat/ivf_flat_test.go
index 92ffdc715f..3bf888d8b1 100644
--- a/go/ivf_flat/ivf_flat_test.go
+++ b/go/ivf_flat/ivf_flat_test.go
@@ -4,7 +4,7 @@ import (
 	"math/rand/v2"
 	"testing"
 
-	cuvs "github.com/rapidsai/cuvs/go"
+	cuvs "github.com/nvidia/cuvs/go"
 )
 
 func TestIvfFlat(t *testing.T) {
diff --git a/go/ivf_flat/search_params.go b/go/ivf_flat/search_params.go
index 3606e4662e..99419cd638 100644
--- a/go/ivf_flat/search_params.go
+++ b/go/ivf_flat/search_params.go
@@ -4,7 +4,7 @@ package ivf_flat
 import "C"
 
 import (
-	cuvs "github.com/rapidsai/cuvs/go"
+	cuvs "github.com/nvidia/cuvs/go"
 )
 
 type SearchParams struct {
diff --git a/go/ivf_pq/index_params.go b/go/ivf_pq/index_params.go
index b35486061e..696b586f72 100644
--- a/go/ivf_pq/index_params.go
+++ b/go/ivf_pq/index_params.go
@@ -6,7 +6,7 @@ import "C"
 import (
 	"errors"
 
-	cuvs "github.com/rapidsai/cuvs/go"
+	cuvs "github.com/nvidia/cuvs/go"
 )
 
 type IndexParams struct {
diff --git a/go/ivf_pq/ivf_pq.go b/go/ivf_pq/ivf_pq.go
index cbbec629d1..8742aae8a9 100644
--- a/go/ivf_pq/ivf_pq.go
+++ b/go/ivf_pq/ivf_pq.go
@@ -7,7 +7,7 @@ import (
 	"errors"
 	"unsafe"
 
-	cuvs "github.com/rapidsai/cuvs/go"
+	cuvs "github.com/nvidia/cuvs/go"
 )
 
 // IVF PQ Index
diff --git a/go/ivf_pq/ivf_pq_test.go b/go/ivf_pq/ivf_pq_test.go
index 39b7727a3d..5d79d3c5db 100644
--- a/go/ivf_pq/ivf_pq_test.go
+++ b/go/ivf_pq/ivf_pq_test.go
@@ -4,7 +4,7 @@ import (
 	"math/rand/v2"
 	"testing"
 
-	cuvs "github.com/rapidsai/cuvs/go"
+	cuvs "github.com/nvidia/cuvs/go"
 )
 
 func TestIvfPq(t *testing.T) {
diff --git a/go/ivf_pq/search_params.go b/go/ivf_pq/search_params.go
index 190ca7036e..bd3602eb86 100644
--- a/go/ivf_pq/search_params.go
+++ b/go/ivf_pq/search_params.go
@@ -6,7 +6,7 @@ import "C"
 import (
 	"errors"
 
-	cuvs "github.com/rapidsai/cuvs/go"
+	cuvs "github.com/nvidia/cuvs/go"
 )
 
 // Supplemental parameters to search IVF PQ Index
diff --git a/go/memory_resource_test.go b/go/memory_resource_test.go
index e44c20485e..89e7e52cec 100644
--- a/go/memory_resource_test.go
+++ b/go/memory_resource_test.go
@@ -3,7 +3,7 @@ package cuvs_test
 import (
 	"testing"
 
-	cuvs "github.com/rapidsai/cuvs/go"
+	cuvs "github.com/nvidia/cuvs/go"
 )
 
 func TestMemoryResource(t *testing.T) {
diff --git a/java/cuvs-java/pom.xml b/java/cuvs-java/pom.xml
index 246a6bcb5a..f426a1e9a6 100644
--- a/java/cuvs-java/pom.xml
+++ b/java/cuvs-java/pom.xml
@@ -26,10 +26,10 @@ SPDX-License-Identifier: Apache-2.0
         </license>
     </licenses>
     <scm>
-        <connection>scm:git:https://github.com/rapidsai/cuvs.git</connection>
+        <connection>scm:git:https://github.com/nvidia/cuvs.git</connection>
         <developerConnection>scm:git:git@github.com:rapidsai/cuvs.git</developerConnection>
         <tag>HEAD</tag>
-        <url>https://github.com/rapidsai/cuvs</url>
+        <url>https://github.com/nvidia/cuvs</url>
     </scm>
     <developers>
         <developer>
diff --git a/java/cuvs-java/src/test/java/com/nvidia/cuvs/CagraRandomizedIT.java b/java/cuvs-java/src/test/java/com/nvidia/cuvs/CagraRandomizedIT.java
index ed83f94ffe..0fd69ddb40 100644
--- a/java/cuvs-java/src/test/java/com/nvidia/cuvs/CagraRandomizedIT.java
+++ b/java/cuvs-java/src/test/java/com/nvidia/cuvs/CagraRandomizedIT.java
@@ -35,7 +35,7 @@ enum TestDatasetMemoryKind {
     DEVICE
   }
 
-  @Ignore // https://github.com/rapidsai/cuvs/issues/1468
+  @Ignore // https://github.com/nvidia/cuvs/issues/1468
   @Test
   public void testResultsTopKWithRandomValues() throws Throwable {
     TestDatasetMemoryKind[] testDatasetMemoryKinds = {
diff --git a/notebooks/VectorSearch_QuestionRetrieval.ipynb b/notebooks/VectorSearch_QuestionRetrieval.ipynb
index d93f1e8fd3..211852d6c3 100644
--- a/notebooks/VectorSearch_QuestionRetrieval.ipynb
+++ b/notebooks/VectorSearch_QuestionRetrieval.ipynb
@@ -8,7 +8,7 @@
     "\n",
     "# Similar Questions Retrieval\n",
     "\n",
-    "This notebook is inspired by the [similar search example of Sentence-Transformers](https://www.sbert.net/examples/applications/semantic-search/README.html#similar-questions-retrieval), and adapted to support [cuVS ANN](https://github.com/rapidsai/cuvs) algorithm.\n",
+    "This notebook is inspired by the [similar search example of Sentence-Transformers](https://www.sbert.net/examples/applications/semantic-search/README.html#similar-questions-retrieval), and adapted to support [cuVS ANN](https://github.com/nvidia/cuvs) algorithm.\n",
     "\n",
     "The model was pre-trained on the [Natural Questions dataset](https://ai.google.com/research/NaturalQuestions). It consists of about 100k real Google search queries, together with an annotated passage from Wikipedia that provides the answer. It is an example of an asymmetric search task. As corpus, we use the smaller [Simple English Wikipedia](http://sbert.net/datasets/simplewiki-2020-11-01.jsonl.gz) so that it fits easily into memory.\n",
     "\n",
diff --git a/python/cuvs/cuvs/tests/test_doctests.py b/python/cuvs/cuvs/tests/test_doctests.py
index a73f648046..1deb02e3e6 100644
--- a/python/cuvs/cuvs/tests/test_doctests.py
+++ b/python/cuvs/cuvs/tests/test_doctests.py
@@ -21,7 +21,7 @@
 
 def _name_in_all(parent, name):
     # Skip multi-GPU (mg) modules - they require special multi-GPU setup
-    # See: https://github.com/rapidsai/cuvs/issues/1647
+    # See: https://github.com/nvidia/cuvs/issues/1647
     if name == "mg" or name == "mg_resources" or name == "MultiGpuResources":
         return False
     return name in getattr(parent, "__all__", [])
diff --git a/python/cuvs/cuvs/tests/test_vamana.py b/python/cuvs/cuvs/tests/test_vamana.py
index f692e4ae69..14f6c2a344 100644
--- a/python/cuvs/cuvs/tests/test_vamana.py
+++ b/python/cuvs/cuvs/tests/test_vamana.py
@@ -40,7 +40,7 @@ def test_vamana_build_basic(dtype):
 @pytest.mark.skip(
     reason="Skipping host build test because of CUDA error "
     "in C++ API. Reference issue: "
-    "https://github.com/rapidsai/cuvs/issues/1380"
+    "https://github.com/nvidia/cuvs/issues/1380"
 )
 def test_vamana_build_basic_host(dtype):
     n_rows, n_cols = 512, 12
diff --git a/python/cuvs/pyproject.toml b/python/cuvs/pyproject.toml
index edf323bf9e..65dc1f02ae 100644
--- a/python/cuvs/pyproject.toml
+++ b/python/cuvs/pyproject.toml
@@ -43,7 +43,7 @@ test = [
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 
 [project.urls]
-Homepage = "https://github.com/rapidsai/cuvs"
+Homepage = "https://github.com/nvidia/cuvs"
 Documentation = "https://docs.rapids.ai/api/cuvs/stable/"
 
 [tool.isort]
diff --git a/python/cuvs_bench/cuvs_bench/plot/__main__.py b/python/cuvs_bench/cuvs_bench/plot/__main__.py
index aca08505ea..0f40f3ac0e 100644
--- a/python/cuvs_bench/cuvs_bench/plot/__main__.py
+++ b/python/cuvs_bench/cuvs_bench/plot/__main__.py
@@ -6,7 +6,7 @@
 # 1: https://github.com/erikbern/ann-benchmarks/blob/main/plot.py
 # 2: https://github.com/erikbern/ann-benchmarks/blob/main/ann_benchmarks/plotting/utils.py  # noqa: E501
 # 3: https://github.com/erikbern/ann-benchmarks/blob/main/ann_benchmarks/plotting/metrics.py  # noqa: E501
-# License: https://github.com/rapidsai/cuvs/blob/main/thirdparty/LICENSES/LICENSE.ann-benchmark # noqa: E501
+# License: https://github.com/nvidia/cuvs/blob/main/thirdparty/LICENSES/LICENSE.ann-benchmark # noqa: E501
 
 import itertools
 import os
diff --git a/python/cuvs_bench/pyproject.toml b/python/cuvs_bench/pyproject.toml
index 8df6955978..895c4cc9ef 100644
--- a/python/cuvs_bench/pyproject.toml
+++ b/python/cuvs_bench/pyproject.toml
@@ -44,7 +44,7 @@ opensearch = [
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 
 [project.urls]
-Homepage = "https://github.com/rapidsai/cuvs"
+Homepage = "https://github.com/nvidia/cuvs"
 
 [tool.setuptools.package-data]
 "*" = ["*.*", "VERSION"]
diff --git a/python/libcuvs/pyproject.toml b/python/libcuvs/pyproject.toml
index 5025daa66d..2d73ace053 100644
--- a/python/libcuvs/pyproject.toml
+++ b/python/libcuvs/pyproject.toml
@@ -29,7 +29,7 @@ classifiers = [
 ]
 
 [project.urls]
-Homepage = "https://github.com/rapidsai/cuvs"
+Homepage = "https://github.com/nvidia/cuvs"
 Documentation = "https://docs.rapids.ai/api/cuvs/stable/"
 
 [project.entry-points."cmake.prefix"]
diff --git a/rust/Cargo.toml b/rust/Cargo.toml
index e5b6809ff9..ada4227f6b 100644
--- a/rust/Cargo.toml
+++ b/rust/Cargo.toml
@@ -11,8 +11,8 @@ resolver = "2"
 [workspace.package]
 version = "26.8.0"
 edition = "2024"
-repository = "https://github.com/rapidsai/cuvs"
-homepage = "https://github.com/rapidsai/cuvs"
+repository = "https://github.com/nvidia/cuvs"
+homepage = "https://github.com/nvidia/cuvs"
 description = "Vector Search and Clustering on the GPU"
 authors = ["NVIDIA Corporation"]
 license = "Apache-2.0"

From 3b89c3c749c230fa7a696258b60584b365e1bb14 Mon Sep 17 00:00:00 2001
From: "Corey J. Nolet" <cjnolet@gmail.com>
Date: Thu, 25 Jun 2026 12:31:14 -0400
Subject: [PATCH 10/10] Fix style after repository URL update

---
 ci/test_java.sh                               |   2 +-
 .../ann_cagra/bug_iterative_cagra_build.cu    | 168 +++++++++---------
 cpp/tests/neighbors/ann_ivf_flat.cuh          |   2 +-
 .../com/nvidia/cuvs/CagraRandomizedIT.java    |   2 +-
 python/cuvs/cuvs/tests/test_vamana.py         |   2 +-
 python/cuvs_bench/cuvs_bench/plot/__main__.py |   2 +-
 6 files changed, 89 insertions(+), 89 deletions(-)

diff --git a/ci/test_java.sh b/ci/test_java.sh
index 7cdd9493c7..18b0b3d08d 100755
--- a/ci/test_java.sh
+++ b/ci/test_java.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 set -euo pipefail
diff --git a/cpp/tests/neighbors/ann_cagra/bug_iterative_cagra_build.cu b/cpp/tests/neighbors/ann_cagra/bug_iterative_cagra_build.cu
index 00af4987d3..defbd9bde4 100644
--- a/cpp/tests/neighbors/ann_cagra/bug_iterative_cagra_build.cu
+++ b/cpp/tests/neighbors/ann_cagra/bug_iterative_cagra_build.cu
@@ -1,85 +1,85 @@
-/*
- * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION.
- * SPDX-License-Identifier: Apache-2.0
- */
-
-#include <gtest/gtest.h>
-
-#include <cuvs/neighbors/cagra.hpp>
-
-#include <raft/core/device_mdarray.hpp>
-#include <raft/core/device_resources.hpp>
-#include <raft/random/rng.cuh>
-
-#include <cstdint>
-#include <type_traits>
-
-namespace cuvs::neighbors::cagra {
-
-template <typename DataT>
-class CagraIterativeBuildBugTest : public ::testing::Test {
- public:
-  using data_type = DataT;
-
- protected:
-  void run()
-  {
-    // Set up iterative CAGRA graph building
-    cagra::index_params index_params;
-    // The bug manifests when graph_degree is equal to intermediate_graph_degree
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <gtest/gtest.h>
+
+#include <cuvs/neighbors/cagra.hpp>
+
+#include <raft/core/device_mdarray.hpp>
+#include <raft/core/device_resources.hpp>
+#include <raft/random/rng.cuh>
+
+#include <cstdint>
+#include <type_traits>
+
+namespace cuvs::neighbors::cagra {
+
+template <typename DataT>
+class CagraIterativeBuildBugTest : public ::testing::Test {
+ public:
+  using data_type = DataT;
+
+ protected:
+  void run()
+  {
+    // Set up iterative CAGRA graph building
+    cagra::index_params index_params;
+    // The bug manifests when graph_degree is equal to intermediate_graph_degree
     // see issue https://github.com/nvidia/cuvs/issues/1818
-    index_params.graph_degree              = 16;
-    index_params.intermediate_graph_degree = 16;
-
-    // Use iterative CAGRA search for graph building
-    index_params.graph_build_params = graph_build_params::iterative_search_params();
-
-    // Build the index
-    auto cagra_index = cagra::build(res, index_params, raft::make_const_mdspan(dataset->view()));
-    raft::resource::sync_stream(res);
-
-    // Verify the index was built successfully
-    ASSERT_GT(cagra_index.size(), 0);
-    ASSERT_EQ(cagra_index.dim(), n_dim);
-  }
-
-  void SetUp() override
-  {
-    dataset.emplace(raft::make_device_matrix<data_type, int64_t>(res, n_samples, n_dim));
-    raft::random::RngState r(1234ULL);
-
-    // Generate random data based on type
-    if constexpr (std::is_same_v<data_type, float>) {
-      raft::random::normal(
-        res, r, dataset->data_handle(), n_samples * n_dim, data_type(0), data_type(1));
-    } else if constexpr (std::is_same_v<data_type, int8_t>) {
-      raft::random::uniformInt(
-        res, r, dataset->data_handle(), n_samples * n_dim, int8_t(-128), int8_t(127));
-    } else if constexpr (std::is_same_v<data_type, uint8_t>) {
-      raft::random::uniformInt(
-        res, r, dataset->data_handle(), n_samples * n_dim, uint8_t(0), uint8_t(255));
-    }
-    raft::resource::sync_stream(res);
-  }
-
-  void TearDown() override
-  {
-    dataset.reset();
-    raft::resource::sync_stream(res);
-  }
-
- private:
-  raft::resources res;
-  std::optional<raft::device_matrix<data_type, int64_t>> dataset = std::nullopt;
-
-  constexpr static int64_t n_samples = 10000;
-  constexpr static int64_t n_dim     = 1024;
-};
-
-// Instantiate test for different data types
-using TestTypes = ::testing::Types<float, int8_t, uint8_t>;
-TYPED_TEST_SUITE(CagraIterativeBuildBugTest, TestTypes);
-
-TYPED_TEST(CagraIterativeBuildBugTest, IterativeBuildTest) { this->run(); }
-
-}  // namespace cuvs::neighbors::cagra
+    index_params.graph_degree              = 16;
+    index_params.intermediate_graph_degree = 16;
+
+    // Use iterative CAGRA search for graph building
+    index_params.graph_build_params = graph_build_params::iterative_search_params();
+
+    // Build the index
+    auto cagra_index = cagra::build(res, index_params, raft::make_const_mdspan(dataset->view()));
+    raft::resource::sync_stream(res);
+
+    // Verify the index was built successfully
+    ASSERT_GT(cagra_index.size(), 0);
+    ASSERT_EQ(cagra_index.dim(), n_dim);
+  }
+
+  void SetUp() override
+  {
+    dataset.emplace(raft::make_device_matrix<data_type, int64_t>(res, n_samples, n_dim));
+    raft::random::RngState r(1234ULL);
+
+    // Generate random data based on type
+    if constexpr (std::is_same_v<data_type, float>) {
+      raft::random::normal(
+        res, r, dataset->data_handle(), n_samples * n_dim, data_type(0), data_type(1));
+    } else if constexpr (std::is_same_v<data_type, int8_t>) {
+      raft::random::uniformInt(
+        res, r, dataset->data_handle(), n_samples * n_dim, int8_t(-128), int8_t(127));
+    } else if constexpr (std::is_same_v<data_type, uint8_t>) {
+      raft::random::uniformInt(
+        res, r, dataset->data_handle(), n_samples * n_dim, uint8_t(0), uint8_t(255));
+    }
+    raft::resource::sync_stream(res);
+  }
+
+  void TearDown() override
+  {
+    dataset.reset();
+    raft::resource::sync_stream(res);
+  }
+
+ private:
+  raft::resources res;
+  std::optional<raft::device_matrix<data_type, int64_t>> dataset = std::nullopt;
+
+  constexpr static int64_t n_samples = 10000;
+  constexpr static int64_t n_dim     = 1024;
+};
+
+// Instantiate test for different data types
+using TestTypes = ::testing::Types<float, int8_t, uint8_t>;
+TYPED_TEST_SUITE(CagraIterativeBuildBugTest, TestTypes);
+
+TYPED_TEST(CagraIterativeBuildBugTest, IterativeBuildTest) { this->run(); }
+
+}  // namespace cuvs::neighbors::cagra
diff --git a/cpp/tests/neighbors/ann_ivf_flat.cuh b/cpp/tests/neighbors/ann_ivf_flat.cuh
index 88b6d168d8..aba874e888 100644
--- a/cpp/tests/neighbors/ann_ivf_flat.cuh
+++ b/cpp/tests/neighbors/ann_ivf_flat.cuh
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+ * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
  * SPDX-License-Identifier: Apache-2.0
  */
 #pragma once
diff --git a/java/cuvs-java/src/test/java/com/nvidia/cuvs/CagraRandomizedIT.java b/java/cuvs-java/src/test/java/com/nvidia/cuvs/CagraRandomizedIT.java
index 0fd69ddb40..e4a0f25e1d 100644
--- a/java/cuvs-java/src/test/java/com/nvidia/cuvs/CagraRandomizedIT.java
+++ b/java/cuvs-java/src/test/java/com/nvidia/cuvs/CagraRandomizedIT.java
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION.
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION.
  * SPDX-License-Identifier: Apache-2.0
  */
 package com.nvidia.cuvs;
diff --git a/python/cuvs/cuvs/tests/test_vamana.py b/python/cuvs/cuvs/tests/test_vamana.py
index 14f6c2a344..5b8a2a9d7a 100644
--- a/python/cuvs/cuvs/tests/test_vamana.py
+++ b/python/cuvs/cuvs/tests/test_vamana.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 import numpy as np
diff --git a/python/cuvs_bench/cuvs_bench/plot/__main__.py b/python/cuvs_bench/cuvs_bench/plot/__main__.py
index 0f40f3ac0e..e13276488d 100644
--- a/python/cuvs_bench/cuvs_bench/plot/__main__.py
+++ b/python/cuvs_bench/cuvs_bench/plot/__main__.py
@@ -1,5 +1,5 @@
 #
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 # This script is inspired by