Skip to content

Commit 0061f66

Browse files
author
Ian
committed
Version bump
- added pathway network struct in order to simplify computations with it
1 parent 32a83d6 commit 0061f66

5 files changed

Lines changed: 193 additions & 6 deletions

File tree

Cargo.lock

Lines changed: 8 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "single-utilities"
3-
version = "0.8.6"
3+
version = "0.8.7"
44
edition = "2024"
55
description = "This crate provdes types, traits and utility functions to the single-rust ecosystem that can be universally used. You can also use it within your own ecosystem 👀"
66
homepage = "https://singlerust.com"
@@ -15,3 +15,4 @@ simd = ["dep:simba"]
1515
[dependencies]
1616
num-traits = "0.2.19"
1717
simba = { version = "0.9.1", optional = true }
18+
anyhow = "1.0.100"

src/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,3 +60,5 @@
6060
pub mod traits;
6161

6262
pub mod types;
63+
64+
pub(crate) mod utils;

src/types/mod.rs

Lines changed: 123 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
1+
use crate::utils::validate_net;
2+
use std::collections::HashMap;
13
use std::hash::Hash;
24

35
/// Represents the direction of operations in matrix or array computations.
4-
///
6+
///
57
/// This enum is used to specify whether operations should be performed
68
/// along rows or columns of a data structure.
79
pub enum Direction {
@@ -22,7 +24,7 @@ impl Clone for Direction {
2224

2325
impl Direction {
2426
/// Checks if the direction is row-wise.
25-
///
27+
///
2628
/// # Returns
2729
/// `true` if the direction is `ROW`, `false` if it's `COLUMN`
2830
pub fn is_row(&self) -> bool {
@@ -34,7 +36,7 @@ impl Direction {
3436
}
3537

3638
/// A trait for types that can serve as batch identifiers.
37-
///
39+
///
3840
/// This trait is used to identify and group data in batch processing operations.
3941
/// Types implementing this trait must be cloneable, comparable for equality,
4042
/// and hashable for efficient lookup operations.
@@ -48,7 +50,7 @@ impl BatchIdentifier for u32 {}
4850
impl BatchIdentifier for usize {}
4951

5052
/// Enumeration of distance metrics for mathematical computations.
51-
///
53+
///
5254
/// This enum defines common distance metrics used in machine learning,
5355
/// clustering, and similarity calculations. Each variant represents
5456
/// a different approach to measuring the distance between points or vectors.
@@ -61,3 +63,120 @@ pub enum DistanceMetric {
6163
/// Cosine distance - measures the cosine of the angle between vectors
6264
Cosine,
6365
}
66+
67+
pub struct PathwayNetwork {
68+
names: Vec<String>, // name of pathways
69+
starts: Vec<usize>, // start of pathway
70+
offsets: Vec<usize>, // length of pathway
71+
cnct: Vec<usize>, // gene index of pathway
72+
weights: Vec<f32>, // weight of each gene in the pathway
73+
}
74+
75+
impl PathwayNetwork {
76+
pub fn new(
77+
names: Vec<String>,
78+
starts: Vec<usize>,
79+
offsets: Vec<usize>,
80+
cnct: Vec<usize>,
81+
weights: Vec<f32>,
82+
) -> Self {
83+
Self {
84+
names,
85+
starts,
86+
offsets,
87+
cnct,
88+
weights,
89+
}
90+
}
91+
92+
pub fn new_wo_weights(
93+
names: Vec<String>,
94+
starts: Vec<usize>,
95+
offsets: Vec<usize>,
96+
cnct: Vec<usize>,
97+
) -> Self {
98+
let weights = vec![1f32; cnct.len()];
99+
Self {
100+
names,
101+
starts,
102+
offsets,
103+
cnct,
104+
weights,
105+
}
106+
}
107+
108+
pub fn new_from_vec(
109+
sources: Vec<String>,
110+
targets: Vec<String>,
111+
weights: Option<Vec<f32>>,
112+
features: Vec<String>,
113+
tmin: u32,
114+
) -> Self {
115+
let res = validate_net(sources, targets, weights, false).unwrap();
116+
let tmin = tmin as usize;
117+
let filtered: HashMap<String, Vec<(String, f32)>> = res
118+
.into_iter()
119+
.filter_map(|(k, v)| if v.len() >= tmin { Some((k, v)) } else { None })
120+
.collect();
121+
122+
let name_to_id: HashMap<String, usize> = features
123+
.iter()
124+
.enumerate()
125+
.map(|(idx, name)| (name.clone(), idx))
126+
.collect();
127+
128+
let total_lengths = filtered.values().fold(0usize, |v, a| v + a.len());
129+
let num_pathways = filtered.len();
130+
131+
let mut names: Vec<String> = Vec::with_capacity(num_pathways);
132+
let mut starts: Vec<usize> = Vec::with_capacity(num_pathways);
133+
let mut offsets: Vec<usize> = Vec::with_capacity(num_pathways);
134+
let mut cnct: Vec<usize> = Vec::with_capacity(total_lengths);
135+
let mut weights_vec: Vec<f32> = Vec::with_capacity(total_lengths);
136+
137+
let mut i = 0usize;
138+
139+
for (k, v) in filtered.into_iter() {
140+
let len = v.len();
141+
142+
for (g_name, g_weight) in v {
143+
let g_idx = name_to_id.get(&g_name).unwrap();
144+
cnct.push(*g_idx);
145+
weights_vec.push(g_weight);
146+
}
147+
148+
names.push(k);
149+
starts.push(i);
150+
offsets.push(len);
151+
i += len;
152+
}
153+
154+
Self {
155+
names,
156+
starts,
157+
offsets,
158+
cnct,
159+
weights: weights_vec,
160+
}
161+
}
162+
163+
pub fn get_pathway_name(&self, idx: usize) -> &str {
164+
self.names[idx].as_str()
165+
}
166+
167+
pub fn get_pathway_features(&self, idx: usize) -> &[usize] {
168+
let srt = self.starts[idx];
169+
let off = srt + self.offsets[idx];
170+
&self.cnct[srt..off]
171+
}
172+
173+
pub fn get_pathway_features_and_weights(&self, idx: usize) -> (&[usize], &[f32]) {
174+
let srt = self.starts[idx];
175+
let off = srt + self.offsets[idx];
176+
(&self.cnct[srt..off], &self.weights[srt..off])
177+
}
178+
179+
pub fn get_num_pathways(&self) -> usize {
180+
self.names.len()
181+
}
182+
}

src/utils/mod.rs

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
use anyhow::anyhow;
2+
use std::collections::HashMap;
3+
4+
pub fn validate_net(
5+
source: Vec<String>,
6+
target: Vec<String>,
7+
weights: Option<Vec<f32>>,
8+
verbose: bool,
9+
) -> anyhow::Result<HashMap<String, Vec<(String, f32)>>> {
10+
let len_source = source.len();
11+
let len_target = target.len();
12+
if (len_source != len_target) {
13+
return Err(anyhow!(
14+
"Source and target must have the same length in order to be used for network construction!"
15+
));
16+
}
17+
18+
let mut map: HashMap<String, Vec<(String, f32)>> = HashMap::new();
19+
let mut current_src: String = "".to_string();
20+
let mut current_target_weight: HashMap<String, f32> = HashMap::new();
21+
for (i, src) in source.iter().enumerate() {
22+
if current_src.is_empty() {
23+
// never set a value in there
24+
current_src = src.clone();
25+
}
26+
27+
if current_src != *src {
28+
// incase this is a different node now
29+
if !current_target_weight.is_empty() {
30+
let data: Vec<(String, f32)> = current_target_weight
31+
.iter()
32+
.map(|(key, value)| (key.clone(), *value))
33+
.collect();
34+
map.insert(current_src, data);
35+
// cleanup
36+
current_target_weight.clear();
37+
current_src = src.clone();
38+
}
39+
}
40+
41+
let src_target = target[i].clone();
42+
let src_target_weight = match &weights {
43+
Some(we) => we[i],
44+
None => 1f32,
45+
};
46+
current_target_weight.insert(src_target, src_target_weight);
47+
}
48+
49+
if !current_target_weight.is_empty() {
50+
let data: Vec<(String, f32)> = current_target_weight
51+
.iter()
52+
.map(|(key, value)| (key.clone(), *value))
53+
.collect();
54+
map.insert(current_src, data);
55+
}
56+
57+
Ok(map)
58+
}

0 commit comments

Comments
 (0)