From d7713cfd1c0d1131ef6e92d3738678a0ba37c707 Mon Sep 17 00:00:00 2001 From: Quentin Blampey Date: Tue, 10 Feb 2026 12:16:19 +0100 Subject: [PATCH 1/3] simplify how distances are computed to make spatial_neighbors faster --- CHANGELOG.md | 5 +++++ novae/utils/build.py | 13 +++---------- tests/test_neighbors.py | 37 +++++++++++++++++++++++++++++++++++++ uv.lock | 14 +++++++------- 4 files changed, 52 insertions(+), 17 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index dec44f4..f041df2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,8 @@ +## [1.0.3] - xxxx-xx-xx + +### Changed +- Faster `novae.spatial_neighbors` () + ## [1.0.2] - 2026-01-15 ### Added diff --git a/novae/utils/build.py b/novae/utils/build.py index 6f59c8a..abb70e9 100644 --- a/novae/utils/build.py +++ b/novae/utils/build.py @@ -8,7 +8,6 @@ from collections.abc import Iterable from enum import Enum from functools import partial -from itertools import chain from typing import Literal, get_args import numpy as np @@ -17,7 +16,6 @@ from anndata.utils import make_index_unique from scipy.sparse import SparseEfficiencyWarning, block_diag, csr_matrix, spmatrix from scipy.spatial import Delaunay -from sklearn.metrics.pairwise import euclidean_distances from sklearn.neighbors import NearestNeighbors from .._constants import Keys, Nums @@ -247,14 +245,9 @@ def _build_connectivity( Adj = csr_matrix((np.ones_like(indices, dtype=np.float64), indices, indptr), shape=(N, N)) if return_distance: - # fmt: off - dists = np.array(list(chain(*( - euclidean_distances(coords[indices[indptr[i] : indptr[i + 1]], :], coords[np.newaxis, i, :]) - for i in range(N) - if len(indices[indptr[i] : indptr[i + 1]]) - )))).squeeze() - Dst = csr_matrix((dists, indices, indptr), shape=(N, N)) - # fmt: on + rows, cols = Adj.nonzero() + p1, p2 = coords[rows], coords[cols] + Dst = csr_matrix((np.linalg.norm(p1 - p2, axis=1), (rows, cols)), shape=Adj.shape) else: r = 1 if radius is None else radius if isinstance(radius, (int, float)) else max(radius) tree = NearestNeighbors(n_neighbors=n_neighs, radius=r, metric="euclidean") diff --git a/tests/test_neighbors.py b/tests/test_neighbors.py index 6ed021c..70ebc25 100644 --- a/tests/test_neighbors.py +++ b/tests/test_neighbors.py @@ -1,8 +1,13 @@ +from itertools import chain + import anndata import numpy as np import pandas as pd import pytest from anndata import AnnData +from scipy.sparse import csr_matrix +from scipy.spatial import Delaunay +from sklearn.metrics import euclidean_distances import novae from novae._constants import Keys @@ -284,3 +289,35 @@ def test_change_n_hops(): assert mean_adj_view2 / mean_adj_view > 1.5 assert mean_adj_local2 / mean_adj_local > 1.5 + + +def test_new_distance_calculation() -> None: + coords = np.random.rand(40, 2) + + N = coords.shape[0] + + tri = Delaunay(coords) + indptr, indices = tri.vertex_neighbor_vertices + Adj = csr_matrix((np.ones_like(indices, dtype=np.float64), indices, indptr), shape=(N, N)) + + dists = np.array( + list( + chain( + *( + euclidean_distances(coords[indices[indptr[i] : indptr[i + 1]], :], coords[np.newaxis, i, :]) + for i in range(N) + if len(indices[indptr[i] : indptr[i + 1]]) + ) + ) + ) + ).squeeze() + Dst = csr_matrix((dists, indices, indptr), shape=(N, N)) + + rows, cols = Adj.nonzero() + p1, p2 = coords[rows], coords[cols] + Dst2 = csr_matrix((np.linalg.norm(p1 - p2, axis=1), (rows, cols)), shape=Adj.shape) + + Dst.sort_indices() + Dst2.sort_indices() + + assert np.allclose(Dst.data, Dst2.data) diff --git a/uv.lock b/uv.lock index 187b911..770548f 100644 --- a/uv.lock +++ b/uv.lock @@ -2521,7 +2521,7 @@ wheels = [ [[package]] name = "novae" -version = "1.0.1" +version = "1.0.2" source = { editable = "." } dependencies = [ { name = "fast-array-utils" }, @@ -2771,7 +2771,7 @@ name = "nvidia-cudnn-cu12" version = "9.10.2.21" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "nvidia-cublas-cu12" }, + { name = "nvidia-cublas-cu12", marker = "sys_platform == 'linux'" }, ] wheels = [ { url = "https://files.pythonhosted.org/packages/ba/51/e123d997aa098c61d029f76663dedbfb9bc8dcf8c60cbd6adbe42f76d049/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:949452be657fa16687d0930933f032835951ef0892b37d2d53824d1a84dc97a8", size = 706758467, upload-time = "2025-06-06T21:54:08.597Z" }, @@ -2782,7 +2782,7 @@ name = "nvidia-cufft-cu12" version = "11.3.3.83" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "nvidia-nvjitlink-cu12" }, + { name = "nvidia-nvjitlink-cu12", marker = "sys_platform == 'linux'" }, ] wheels = [ { url = "https://files.pythonhosted.org/packages/1f/13/ee4e00f30e676b66ae65b4f08cb5bcbb8392c03f54f2d5413ea99a5d1c80/nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4d2dd21ec0b88cf61b62e6b43564355e5222e4a3fb394cac0db101f2dd0d4f74", size = 193118695, upload-time = "2025-03-07T01:45:27.821Z" }, @@ -2809,9 +2809,9 @@ name = "nvidia-cusolver-cu12" version = "11.7.3.90" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "nvidia-cublas-cu12" }, - { name = "nvidia-cusparse-cu12" }, - { name = "nvidia-nvjitlink-cu12" }, + { name = "nvidia-cublas-cu12", marker = "sys_platform == 'linux'" }, + { name = "nvidia-cusparse-cu12", marker = "sys_platform == 'linux'" }, + { name = "nvidia-nvjitlink-cu12", marker = "sys_platform == 'linux'" }, ] wheels = [ { url = "https://files.pythonhosted.org/packages/85/48/9a13d2975803e8cf2777d5ed57b87a0b6ca2cc795f9a4f59796a910bfb80/nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:4376c11ad263152bd50ea295c05370360776f8c3427b30991df774f9fb26c450", size = 267506905, upload-time = "2025-03-07T01:47:16.273Z" }, @@ -2822,7 +2822,7 @@ name = "nvidia-cusparse-cu12" version = "12.5.8.93" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "nvidia-nvjitlink-cu12" }, + { name = "nvidia-nvjitlink-cu12", marker = "sys_platform == 'linux'" }, ] wheels = [ { url = "https://files.pythonhosted.org/packages/c2/f5/e1854cb2f2bcd4280c44736c93550cc300ff4b8c95ebe370d0aa7d2b473d/nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1ec05d76bbbd8b61b06a80e1eaf8cf4959c3d4ce8e711b65ebd0443bb0ebb13b", size = 288216466, upload-time = "2025-03-07T01:48:13.779Z" }, From b5c3e37b7e2b8f742bf5365e69bea8a667e752d4 Mon Sep 17 00:00:00 2001 From: Quentin Blampey Date: Tue, 10 Feb 2026 12:27:39 +0100 Subject: [PATCH 2/3] update changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f041df2..9ccf37d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,7 @@ ## [1.0.3] - xxxx-xx-xx ### Changed -- Faster `novae.spatial_neighbors` () +- Make `novae.spatial_neighbors` faster (#40) ## [1.0.2] - 2026-01-15 From 8b2efe0f6f3aadcaa64f85b1c77c6125b09ac959 Mon Sep 17 00:00:00 2001 From: Quentin Blampey Date: Tue, 10 Feb 2026 12:52:47 +0100 Subject: [PATCH 3/3] fix tests --- novae/utils/build.py | 6 +++--- tests/test_neighbors.py | 11 +++++------ 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/novae/utils/build.py b/novae/utils/build.py index abb70e9..6ed6736 100644 --- a/novae/utils/build.py +++ b/novae/utils/build.py @@ -245,9 +245,9 @@ def _build_connectivity( Adj = csr_matrix((np.ones_like(indices, dtype=np.float64), indices, indptr), shape=(N, N)) if return_distance: - rows, cols = Adj.nonzero() - p1, p2 = coords[rows], coords[cols] - Dst = csr_matrix((np.linalg.norm(p1 - p2, axis=1), (rows, cols)), shape=Adj.shape) + rows = np.repeat(np.arange(N), np.diff(indptr)) + dists = np.linalg.norm(coords[rows] - coords[indices], axis=1) + Dst = csr_matrix((dists, indices, indptr), shape=(N, N)) else: r = 1 if radius is None else radius if isinstance(radius, (int, float)) else max(radius) tree = NearestNeighbors(n_neighbors=n_neighs, radius=r, metric="euclidean") diff --git a/tests/test_neighbors.py b/tests/test_neighbors.py index 70ebc25..69ebf74 100644 --- a/tests/test_neighbors.py +++ b/tests/test_neighbors.py @@ -298,7 +298,6 @@ def test_new_distance_calculation() -> None: tri = Delaunay(coords) indptr, indices = tri.vertex_neighbor_vertices - Adj = csr_matrix((np.ones_like(indices, dtype=np.float64), indices, indptr), shape=(N, N)) dists = np.array( list( @@ -313,11 +312,11 @@ def test_new_distance_calculation() -> None: ).squeeze() Dst = csr_matrix((dists, indices, indptr), shape=(N, N)) - rows, cols = Adj.nonzero() - p1, p2 = coords[rows], coords[cols] - Dst2 = csr_matrix((np.linalg.norm(p1 - p2, axis=1), (rows, cols)), shape=Adj.shape) + rows = np.repeat(np.arange(N), np.diff(indptr)) + dists = np.linalg.norm(coords[rows] - coords[indices], axis=1) + Dst2 = csr_matrix((dists, indices, indptr), shape=(N, N)) - Dst.sort_indices() - Dst2.sort_indices() + assert (Dst.indices == Dst2.indices).all() + assert (Dst.indptr == Dst2.indptr).all() assert np.allclose(Dst.data, Dst2.data)