diff --git a/python/CHANGELOG.rst b/python/CHANGELOG.rst index bc9b6217da..9e577eb79d 100644 --- a/python/CHANGELOG.rst +++ b/python/CHANGELOG.rst @@ -11,7 +11,8 @@ In development - Add ``TreeSequence.ld_matrix`` stats method and documentation, for computing two-locus statistics in site and branch mode. - (:user:`lkirk`, :user:`apragsdale`, :pr:`3416`) + (:user:`lkirk`, :user:`apragsdale`, :pr:`3416`) +- Add `node_labels` parameter to `write_nexus`. (:user:`kaathewisegit`, :pr:`3442`) -------------------- [1.0.2] - 2026-03-06 diff --git a/python/tests/test_phylo_formats.py b/python/tests/test_phylo_formats.py index 7736712089..ca336d1a6a 100644 --- a/python/tests/test_phylo_formats.py +++ b/python/tests/test_phylo_formats.py @@ -26,6 +26,7 @@ import functools import io +import random import textwrap import dendropy @@ -334,6 +335,85 @@ def test_nexus_no_trees_or_alignments(self): ) +class TestNexusNodeLabels: + @tests.cached_example + def balanced_tree(self): + # 4 + # ┏━┻┓ + # ┃ 3 + # ┃ ┏┻┓ + # 0 1 2 + return tskit.Tree.generate_balanced(3) + + def test_as_nexus_labels_basic(self): + ts = self.balanced_tree().tree_sequence + labels = {0: "human", 1: "chimp", 2: "bonobo"} + expected = textwrap.dedent( + """\ + #NEXUS + BEGIN TAXA; + DIMENSIONS NTAX=3; + TAXLABELS human chimp bonobo; + END; + BEGIN TREES; + TRANSLATE n0 human, n1 chimp, n2 bonobo; + TREE t0^1 = [&R] (n0:2,(n1:1,n2:1):1); + END; + """ + ) + assert expected == ts.as_nexus(include_alignments=False, node_labels=labels) + + def test_as_nexus_labels_partial(self): + ts = self.balanced_tree().tree_sequence + labels = {0: "human", 2: "bonobo"} + expected = textwrap.dedent( + """\ + #NEXUS + BEGIN TAXA; + DIMENSIONS NTAX=3; + TAXLABELS human n1 bonobo; + END; + BEGIN TREES; + TRANSLATE n0 human, n2 bonobo; + TREE t0^1 = [&R] (n0:2,(n1:1,n2:1):1); + END; + """ + ) + assert expected == ts.as_nexus(include_alignments=False, node_labels=labels) + + def test_as_nexus_labels_none(self): + ts = self.balanced_tree().tree_sequence + expected = textwrap.dedent( + """\ + #NEXUS + BEGIN TAXA; + DIMENSIONS NTAX=3; + TAXLABELS n0 n1 n2; + END; + BEGIN TREES; + TREE t0^1 = [&R] (n0:2,(n1:1,n2:1):1); + END; + """ + ) + assert expected == ts.as_nexus(include_alignments=False, node_labels=None) + + @pytest.mark.parametrize("ts", get_example_tree_sequences()) + def test_parseable(self, ts): + for tree in ts.trees(): + if not tree.has_single_root: + return + + labels = {} + samples = ts.samples() + k = random.randint(1, len(samples)) + for node in random.sample(list(samples), k): + labels[node] = f"new_node_which_was_{node}" + + nexus = ts.as_nexus(include_alignments=False, node_labels=labels) + print(nexus) + dendropy.DataSet.get(data=nexus, schema="nexus") + + class TestNewickCodePaths: """ Test that the different code paths we use under the hood lead to diff --git a/python/tskit/text_formats.py b/python/tskit/text_formats.py index d8a89d0be6..9898e6262a 100644 --- a/python/tskit/text_formats.py +++ b/python/tskit/text_formats.py @@ -120,6 +120,7 @@ def write_nexus( include_alignments, reference_sequence, missing_data_character, + node_labels, isolated_as_missing=None, ): # See TreeSequence.write_nexus for documentation on parameters. @@ -134,7 +135,13 @@ def write_nexus( print("#NEXUS", file=out) print("BEGIN TAXA;", file=out) print("", f"DIMENSIONS NTAX={ts.num_samples};", sep=indent, file=out) - taxlabels = " ".join(f"n{u}" for u in ts.samples()) + + if node_labels is not None: + taxlabels = " ".join( + node_labels[u] if u in node_labels else f"n{u}" for u in ts.samples() + ) + else: + taxlabels = " ".join(f"n{u}" for u in ts.samples()) print("", f"TAXLABELS {taxlabels};", sep=indent, file=out) print("END;", file=out) @@ -166,6 +173,11 @@ def write_nexus( include_trees = True if include_trees is None else include_trees if include_trees: print("BEGIN TREES;", file=out) + + if node_labels is not None: + translations = ", ".join(f"n{u} {name}" for u, name in node_labels.items()) + print(f" TRANSLATE {translations};", file=out) + for tree in ts.trees(): start_interval = "{0:.{1}f}".format(tree.interval.left, pos_precision) end_interval = "{0:.{1}f}".format(tree.interval.right, pos_precision) diff --git a/python/tskit/trees.py b/python/tskit/trees.py index a370daf17f..1f79440e81 100644 --- a/python/tskit/trees.py +++ b/python/tskit/trees.py @@ -6797,6 +6797,7 @@ def write_nexus( reference_sequence=None, missing_data_character=None, isolated_as_missing=None, + node_labels=None, ): """ Returns a `nexus encoding `_ @@ -6896,6 +6897,10 @@ def write_nexus( :param str missing_data_character: As for the :meth:`.alignments` method, but defaults to "?". :param bool isolated_as_missing: As for the :meth:`.alignments` method. + :param node_labels: A map of type `{index: name}`. Samples present in + the map will have the given name instead of `n{index}`. Note that + the names must not have whitespace (spaces should be replaced by + underscores) or puncuation in them. :return: A nexus representation of this :class:`TreeSequence` :rtype: str """ @@ -6908,6 +6913,7 @@ def write_nexus( reference_sequence=reference_sequence, missing_data_character=missing_data_character, isolated_as_missing=isolated_as_missing, + node_labels=node_labels, ) def as_nexus(self, **kwargs):