Skip to content

Commit 48956af

Browse files
Add pack_kmer_lossy function that converts a slice of ASCII into its u64 representation
1 parent 2d236ed commit 48956af

2 files changed

Lines changed: 22 additions & 3 deletions

File tree

src/lib.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -142,9 +142,9 @@ pub use ascii_seq::{AsciiSeq, AsciiSeqVec};
142142
pub use packed_n_seq::{PackedNSeq, PackedNSeqVec};
143143
pub use packed_seq::{BitSeq, BitSeqVec, PackedSeq, PackedSeqVec};
144144
pub use packed_seq::{
145-
complement_base, complement_base_simd, complement_char, pack_char, unpack_base, unpack_kmer,
146-
unpack_kmer_into_vec, unpack_kmer_to_vec, unpack_kmer_u128, unpack_kmer_u128_into_vec,
147-
unpack_kmer_u128_to_vec,
145+
complement_base, complement_base_simd, complement_char, pack_char, pack_kmer_lossy,
146+
pack_kmer_u128_lossy, unpack_base, unpack_kmer, unpack_kmer_into_vec, unpack_kmer_to_vec,
147+
unpack_kmer_u128, unpack_kmer_u128_into_vec, unpack_kmer_u128_to_vec,
148148
};
149149
pub use padded_it::{Advance, ChunkIt, PaddedIt};
150150
pub use traits::{Delay, Seq, SeqVec};

src/packed_seq.rs

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,25 @@ pub fn pack_char(base: u8) -> u8 {
216216
pub fn pack_char_lossy(base: u8) -> u8 {
217217
(base >> 1) & 3
218218
}
219+
/// Pack a slice of ASCII `ACTGactg` characters into its packed 2-bit kmer representation.
220+
/// Other characters are silently converted.
221+
#[inline(always)]
222+
pub fn pack_kmer_lossy(slice: &[u8]) -> u64 {
223+
let mut kmer = 0;
224+
for (i, &base) in slice.iter().enumerate() {
225+
kmer |= (pack_char_lossy(base) as u64) << (2 * i);
226+
}
227+
kmer
228+
}
229+
/// Pack a slice of ASCII `ACTGactg` characters into its packed 2-bit kmer representation.
230+
#[inline(always)]
231+
pub fn pack_kmer_u128_lossy(slice: &[u8]) -> u128 {
232+
let mut kmer = 0;
233+
for (i, &base) in slice.iter().enumerate() {
234+
kmer |= (pack_char_lossy(base) as u128) << (2 * i);
235+
}
236+
kmer
237+
}
219238

220239
/// Unpack a 2-bit DNA base into the corresponding `ACTG` character.
221240
#[inline(always)]

0 commit comments

Comments
 (0)