Skip to content

Commit 466441e

Browse files
authored
Merge pull request #70 from rbaldwin-bugseq/add_binding_for_ncbi_output
added binding for saving Taxonomy to ncbi format
2 parents adad918 + 79a5eb9 commit 466441e

3 files changed

Lines changed: 69 additions & 6 deletions

File tree

src/formats/ncbi.rs

Lines changed: 49 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -95,18 +95,24 @@ where
9595
let mut node_writer = BufWriter::new(std::fs::File::create(dir.join(NODES_FILENAME))?);
9696
let mut name_writer = BufWriter::new(std::fs::File::create(dir.join(NAMES_FILENAME))?);
9797

98-
for key in tax.traverse(tax.root())?.filter(|x| x.1).map(|x| x.0) {
98+
let root = tax.root();
99+
for key in tax.traverse(root.clone())?.filter(|x| x.1).map(|x| x.0) {
99100
let name = tax.name(key.clone())?;
100101
let rank = tax.rank(key.clone())?;
102+
let parent = if key == root {
103+
format!("{}", key)
104+
} else {
105+
tax.parent(key.clone())?
106+
.map(|(x, _)| format!("{}", x))
107+
.unwrap_or_default()
108+
};
101109
name_writer
102-
.write_all(format!("{}\t|\t{}\t|\tscientific name\t|", &key, name).as_bytes())?;
110+
.write_all(format!("{}\t|\t{}\t|\t\t|\tscientific name\t|\n", &key, name).as_bytes())?;
103111
node_writer.write_all(
104112
format!(
105-
"{}\t|\t{}\t|\t{}\t|\t\t|\t\t|\t\t|\t\t|\t\t|\t\t|\t\t|\t\t|\t\t|\t\t|",
113+
"{}\t|\t{}\t|\t{}\t|\t\t|\t\t|\t\t|\t\t|\t\t|\t\t|\t\t|\t\t|\t\t|\t\t|\n",
106114
&key,
107-
tax.parent(key.clone())?
108-
.map(|(x, _)| format!("{}", x))
109-
.unwrap_or_default(),
115+
parent,
110116
rank.to_ncbi_rank(),
111117
)
112118
.as_bytes(),
@@ -215,5 +221,42 @@ mod tests {
215221

216222
let out = path.join("out");
217223
save::<&str, _, _>(&tax, &out).unwrap();
224+
225+
// now load again and validate a few taxids
226+
let tax2 = load(&out).unwrap();
227+
228+
// Check E. coli (562)
229+
assert_eq!(
230+
Taxonomy::<&str>::name(&tax2, "562").unwrap(),
231+
"Escherichia coli"
232+
);
233+
assert_eq!(
234+
Taxonomy::<&str>::rank(&tax2, "562").unwrap(),
235+
TaxRank::Species
236+
);
237+
assert_eq!(
238+
Taxonomy::<&str>::parent(&tax2, "562").unwrap(),
239+
Some(("561", 1.))
240+
);
241+
242+
// Check Escherichia (561)
243+
assert_eq!(Taxonomy::<&str>::name(&tax2, "561").unwrap(), "Escherichia");
244+
assert_eq!(
245+
Taxonomy::<&str>::rank(&tax2, "561").unwrap(),
246+
TaxRank::Genus
247+
);
248+
assert_eq!(
249+
Taxonomy::<&str>::parent(&tax2, "561").unwrap(),
250+
Some(("543", 1.))
251+
);
252+
253+
// Check root (1)
254+
assert_eq!(Taxonomy::<&str>::name(&tax2, "1").unwrap(), "root");
255+
256+
// Check children relationship preserved
257+
assert_eq!(
258+
Taxonomy::<&str>::children(&tax2, "561").unwrap(),
259+
vec!["562"]
260+
);
218261
}
219262
}

src/python.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,19 @@ impl Taxonomy {
263263
Ok(PyBytes::new(py, &bytes).into())
264264
}
265265

266+
/// to_ncbi(self, output_dir: str)
267+
/// --
268+
///
269+
/// Export a Taxonomy to NCBI format files (nodes.dmp and names.dmp).
270+
/// The output directory will be created if it doesn't exist.
271+
///
272+
/// Args:
273+
/// output_dir: Path to the directory where nodes.dmp and names.dmp will be written
274+
fn to_ncbi(&self, output_dir: &str) -> PyResult<()> {
275+
py_try!(ncbi::save::<&str, _, _>(&self.tax, output_dir));
276+
Ok(())
277+
}
278+
266279
/// to_newick(self)
267280
/// --
268281
///

taxonomy.pyi

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,13 @@ class Taxonomy:
7575
"""Export a Taxonomy as a Newick-encoded byte string."""
7676
...
7777

78+
def to_ncbi(self, output_dir: str) -> None:
79+
"""
80+
Export a Taxonomy to NCBI format files (nodes.dmp and names.dmp).
81+
The output directory will be created if it doesn't exist.
82+
"""
83+
...
84+
7885
def node(self, tax_id: str) -> Optional[TaxonomyNode]:
7986
"""Find a node by its id. Returns `None` if not found"""
8087
...

0 commit comments

Comments
 (0)