Skip to content

Commit 2ab878b

Browse files
authored
Merge pull request #2 from SingleRust/feature-dev-export
Feature dev export
2 parents 2e00b32 + 893844d commit 2ab878b

8 files changed

Lines changed: 251 additions & 16 deletions

File tree

Cargo.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "anndata-memory"
3-
version = "1.0.0"
3+
version = "1.0.1"
44
edition = "2021"
55
readme = "README.md"
66
repository = "https://github.com/SingleRust/Anndata-Memory"

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# AnnData-Memory
22

3-
![Version](https://img.shields.io/badge/version-1.0.0-blue)
3+
![Version](https://img.shields.io/badge/version-1.0.1-blue)
44
[![License](https://img.shields.io/badge/license-BSD--3--Clause-green)](LICENSE.md)
55

66
A high-performance, thread-safe, in-memory implementation of the AnnData data structure for the SingleRust ecosystem.
@@ -31,7 +31,7 @@ Add AnnData-Memory to your `Cargo.toml`:
3131

3232
```toml
3333
[dependencies]
34-
anndata-memory = "1.0.0"
34+
anndata-memory = "1.0.1"
3535
```
3636

3737
## Usage

src/ad/helpers.rs

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ use anndata::{
1212
};
1313
use anyhow::bail;
1414

15-
use nalgebra_sparse::CsrMatrix;
1615
use ndarray::Array2;
1716
use polars::{
1817
frame::DataFrame,
@@ -751,4 +750,11 @@ impl IMElementCollection {
751750
.map(|element| element.deep_clone())
752751
.ok_or_else(|| anyhow::anyhow!("Key not found"))
753752
}
753+
754+
pub fn keys(&self) -> anyhow::Result<Vec<String>> {
755+
let read_guard = self.0.read_inner();
756+
let data = read_guard.deref();
757+
let keys = data.keys().map(|k| k.clone()).collect();
758+
Ok(keys)
759+
}
754760
}

src/converter.rs

Lines changed: 111 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,52 +1,154 @@
1-
use std::ops::Deref;
1+
use std::ops::{Deref, DerefMut};
22

3-
use anndata::{AnnData, AnnDataOp, ArrayData, ArrayElemOp, AxisArrays, Backend, ElemCollection};
3+
use anndata::data::DataFrameIndex;
4+
use anndata::{
5+
AnnData, AnnDataOp, ArrayData, ArrayElemOp, AxisArrays, Backend, ElemCollection,
6+
ElemCollectionOp,
7+
};
8+
use anndata_hdf5::H5;
49
use anyhow::Ok;
510

6-
use crate::{ad::helpers::{IMElement, IMAxisArrays}, IMAnnData, IMArrayElement, IMElementCollection};
11+
use crate::{
12+
ad::helpers::{IMAxisArrays, IMElement},
13+
IMAnnData, IMArrayElement, IMElementCollection,
14+
};
715

816
pub fn convert_to_in_memory<B: Backend>(anndata: AnnData<B>) -> anyhow::Result<IMAnnData> {
9-
let obs_df = anndata.read_obs()?;
17+
let obs_df = anndata.read_obs()?;
1018
let obs_names = anndata.obs_names();
1119
let var_df = anndata.read_var()?;
1220
let var_names = anndata.var_names();
1321
let x = anndata.x().get::<ArrayData>()?.unwrap();
14-
let imad = IMAnnData::new_extended(x, obs_names.into_vec(), var_names.into_vec(), obs_df, var_df)?;
22+
let imad = IMAnnData::new_extended(
23+
x,
24+
obs_names.into_vec(),
25+
var_names.into_vec(),
26+
obs_df,
27+
var_df,
28+
)?;
1529
convert_axis_arrays_to_mem(anndata.obsm(), imad.obsm())?;
1630
convert_axis_arrays_to_mem(anndata.obsp(), imad.obsp())?;
1731
convert_axis_arrays_to_mem(anndata.varm(), imad.varm())?;
1832
convert_axis_arrays_to_mem(anndata.varp(), imad.varp())?;
33+
convert_axis_arrays_to_mem(anndata.layers(), imad.layers())?;
1934
convert_uns_to_mem(anndata.uns(), imad.uns())?;
2035
anndata.close()?;
2136
Ok(imad)
2237
}
2338

24-
fn convert_axis_arrays_to_mem<B: Backend>(axis_arr: &AxisArrays<B>, reference_element: IMAxisArrays) -> anyhow::Result<()> {
39+
fn convert_axis_arrays_to_mem<B: Backend>(
40+
axis_arr: &AxisArrays<B>,
41+
reference_element: IMAxisArrays,
42+
) -> anyhow::Result<()> {
2543
if axis_arr.is_none() {
2644
return Ok(());
2745
}
2846
let x = axis_arr.inner();
2947
let iax = x.deref();
3048
let data = iax.deref();
31-
for (k,v) in data.iter() {
49+
for (k, v) in data.iter() {
3250
let arr = v.get::<ArrayData>()?.unwrap();
3351
let im_arr = IMArrayElement::new(arr);
3452
reference_element.add_array(k.to_string(), im_arr)?;
3553
}
3654
Ok(())
3755
}
3856

39-
fn convert_uns_to_mem<B: Backend>(elem_col: &ElemCollection<B>, reference_element: IMElementCollection) -> anyhow::Result<()> {
57+
fn convert_uns_to_mem<B: Backend>(
58+
elem_col: &ElemCollection<B>,
59+
reference_element: IMElementCollection,
60+
) -> anyhow::Result<()> {
4061
if elem_col.is_none() {
4162
return Ok(());
4263
}
4364
let x = elem_col.inner();
4465
let iax = x.deref();
4566
let data = iax.deref();
46-
for (k,v) in data.iter() {
67+
for (k, v) in data.iter() {
4768
let data = v.inner().data();
4869
let d = IMElement::new(data?);
4970
reference_element.add_data(k.to_string(), d)?;
5071
}
5172
Ok(())
5273
}
74+
75+
fn convert_axis_arrays_to_backed<B: Backend>(
76+
reference: IMAxisArrays,
77+
target_axis_arrays: &AxisArrays<B>,
78+
) -> anyhow::Result<()> {
79+
if reference.is_empty() {
80+
return Ok(());
81+
}
82+
83+
for key in reference.keys() {
84+
let value = reference.get_array(&key)?;
85+
let array_data = value.get_data()?;
86+
let mut guard = target_axis_arrays.lock();
87+
let data = guard.deref_mut();
88+
match (data) {
89+
None => {}
90+
Some(data) => data.add_data(&key, array_data)?,
91+
};
92+
}
93+
94+
Ok(())
95+
}
96+
97+
fn convert_uns_to_backed<B: Backend>(
98+
reference: IMElementCollection,
99+
target_file: &AnnData<B>,
100+
) -> anyhow::Result<()> {
101+
let uns = target_file.uns();
102+
103+
let keys = {
104+
let read_guard = reference.0.read_inner();
105+
read_guard.keys().cloned().collect::<Vec<String>>()
106+
};
107+
108+
for key in keys {
109+
let val = reference.get_data(&key)?;
110+
{
111+
let elem_data = val.get_data()?;
112+
113+
// Lock the uns structure and update it
114+
let mut guard = uns.lock();
115+
let data = guard.deref_mut();
116+
if let Some(data) = data {
117+
data.add_data(&key, elem_data)?;
118+
}
119+
}
120+
}
121+
Ok(())
122+
}
123+
124+
pub fn convert_to_backed<B: Backend>(imad: &IMAnnData, anndata: &AnnData<B>) -> anyhow::Result<()> {
125+
let x_data = imad.x().get_data()?;
126+
anndata.set_x(x_data)?;
127+
128+
anndata.set_obs(imad.obs().get_data())?;
129+
anndata.set_obs_names(DataFrameIndex::from(imad.obs_names()))?;
130+
anndata.set_var(imad.var().get_data())?;
131+
anndata.set_var_names(DataFrameIndex::from(imad.var_names()))?;
132+
133+
convert_axis_arrays_to_backed(imad.obsm(), anndata.obsm())?;
134+
convert_axis_arrays_to_backed(imad.obsp(), anndata.obsp())?;
135+
convert_axis_arrays_to_backed(imad.varm(), anndata.varm())?;
136+
convert_axis_arrays_to_backed(imad.varp(), anndata.varp())?;
137+
138+
convert_axis_arrays_to_backed(imad.layers(), anndata.layers())?;
139+
140+
convert_uns_to_backed(imad.uns(), anndata)?;
141+
142+
Ok(())
143+
}
144+
145+
pub fn convert_to_new_backed_h5(
146+
imad: &IMAnnData,
147+
path: impl AsRef<std::path::Path>,
148+
) -> anyhow::Result<AnnData<H5>> {
149+
let anndata = AnnData::<H5>::new(path)?;
150+
151+
convert_to_backed(imad, &anndata)?;
152+
153+
Ok(anndata)
154+
}

src/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,4 +10,6 @@ pub use ad::helpers::IMElementCollection;
1010
pub use ad::helpers::IMElement;
1111
pub use ad::helpers::IMAxisArrays;
1212
pub use converter::convert_to_in_memory;
13+
pub use converter::convert_to_backed;
14+
pub use converter::convert_to_new_backed_h5;
1315
pub use base::DeepClone;

tests/test_conversion.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@ use anndata::{
44
use anndata_hdf5::H5;
55
use anndata_memory::*;
66

7-
#[test]
8-
//#[allow(dead_code)]
7+
//#[test]
8+
#[allow(dead_code)]
99
fn test_convert_anndata_to_imanndata_filter() -> anyhow::Result<()> {
1010
let h5_file = H5::open("/local/bachelor_thesis_ian/single_bench/data/merged_test.h5ad")?;
1111
let anndata = AnnData::<H5>::open(h5_file)?;
Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
use anndata::{AnnData, AnnDataOp, ArrayData, AxisArraysOp, Backend};
2+
use anndata_hdf5::H5;
3+
use anndata_memory::*;
4+
use nalgebra_sparse::{CooMatrix, CsrMatrix};
5+
use ndarray::Array2;
6+
use polars::prelude::*;
7+
use rand::rngs::StdRng;
8+
use rand::{Rng, SeedableRng};
9+
use tempfile::tempdir;
10+
11+
#[test]
12+
fn test_round_trip_conversion() -> anyhow::Result<()> {
13+
let n_rows = 10;
14+
let n_cols = 20;
15+
let density = 0.3;
16+
let nnz = (n_rows * n_cols) as f64 * density;
17+
let mut rng = StdRng::seed_from_u64(42);
18+
19+
let mut coo_matrix = CooMatrix::new(n_rows, n_cols);
20+
21+
for _ in 0..(nnz as usize) {
22+
let row = rng.gen_range(0..n_rows);
23+
let col = rng.gen_range(0..n_cols);
24+
let value = rng.gen::<f64>();
25+
coo_matrix.push(row, col, value);
26+
}
27+
28+
let csr_matrix: CsrMatrix<f64> = (&coo_matrix).into();
29+
30+
let im_array = IMArrayElement::new(ArrayData::from(csr_matrix));
31+
32+
let obs_names: Vec<String> = (0..n_rows).map(|i| format!("cell_{}", i)).collect();
33+
34+
let cell_types = vec!["T cell", "B cell", "NK cell", "Monocyte", "Dendritic"];
35+
36+
let obs_cell_types: Vec<&str> = (0..n_rows)
37+
.map(|i| cell_types[i % cell_types.len()])
38+
.collect();
39+
40+
let obs_df = DataFrame::new(vec![Column::from(Series::new(
41+
PlSmallStr::from("cell_type"),
42+
obs_cell_types,
43+
))])?;
44+
let obs_element = IMDataFrameElement::new(obs_df, obs_names.clone().into());
45+
46+
let var_names: Vec<String> = (0..n_cols).map(|i| format!("gene_{}", i)).collect();
47+
let gene_types = vec!["protein_coding", "lincRNA", "pseudogene", "miRNA"];
48+
let var_gene_types: Vec<&str> = (0..n_cols)
49+
.map(|i| gene_types[i % gene_types.len()])
50+
.collect();
51+
52+
let var_df = DataFrame::new(vec![Column::from(Series::new(
53+
PlSmallStr::from("gene_type"),
54+
var_gene_types,
55+
))])?;
56+
let var_element = IMDataFrameElement::new(var_df, var_names.clone().into());
57+
58+
let mut im_anndata = IMAnnData::new(im_array, obs_element, var_element)?;
59+
60+
let mut coo_norm = CooMatrix::new(n_rows, n_cols);
61+
62+
for _ in 0..(nnz as usize) {
63+
let row = rng.gen_range(0..n_rows);
64+
let col = rng.gen_range(0..n_cols);
65+
let value = rng.gen::<f64>() * 0.1; // Smaller values
66+
coo_norm.push(row, col, value);
67+
}
68+
69+
let csr_norm: CsrMatrix<f64> = CsrMatrix::from(&coo_norm);
70+
71+
let layer_im_array = IMArrayElement::new(ArrayData::from(csr_norm));
72+
im_anndata.add_layer("normalized".to_string(), layer_im_array)?;
73+
74+
let n_components = 2;
75+
let pca_data: Array2<f64> = Array2::from_shape_fn((n_rows, n_components), |_| rng.gen());
76+
let pca_array_data = ArrayData::from(pca_data);
77+
let pca_im_array = IMArrayElement::new(pca_array_data);
78+
im_anndata
79+
.obsm()
80+
.add_array("X_pca".to_string(), pca_im_array)?;
81+
82+
let temp_dir = tempdir()?;
83+
let h5_path = temp_dir.path().join("test_anndata.h5ad");
84+
let h5_anndata = convert_to_new_backed_h5(&im_anndata, &h5_path)?;
85+
86+
assert_eq!(h5_anndata.n_obs(), n_rows);
87+
assert_eq!(h5_anndata.n_vars(), n_cols);
88+
89+
let h5_obs_names = h5_anndata.obs_names().into_vec();
90+
assert_eq!(h5_obs_names, obs_names);
91+
92+
let h5_var_names = h5_anndata.var_names().into_vec();
93+
assert_eq!(h5_var_names, var_names);
94+
95+
assert!(h5_anndata
96+
.layers()
97+
.keys()
98+
.contains(&"normalized".to_string()));
99+
100+
assert!(h5_anndata.obsm().keys().contains(&"X_pca".to_string()));
101+
102+
h5_anndata.close()?;
103+
104+
// Now test reading back from H5 to in-memory
105+
let h5_file = H5::open(&h5_path)?;
106+
let reopened_anndata = AnnData::<H5>::open(h5_file)?;
107+
let im_anndata2 = convert_to_in_memory(reopened_anndata)?;
108+
109+
println!("{:?}", im_anndata2
110+
.layers()
111+
.keys());
112+
113+
// Verify round-trip conversion
114+
assert_eq!(im_anndata2.n_obs(), n_rows);
115+
assert_eq!(im_anndata2.n_vars(), n_cols);
116+
assert_eq!(im_anndata2.obs_names(), obs_names);
117+
assert_eq!(im_anndata2.var_names(), var_names);
118+
assert!(im_anndata2
119+
.layers()
120+
.keys()
121+
.contains(&"normalized".to_string()));
122+
assert!(im_anndata2.obsm().keys().contains(&"X_pca".to_string()));
123+
124+
Ok(())
125+
}

0 commit comments

Comments
 (0)