-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgroup_chromosomes.py
More file actions
executable file
·54 lines (38 loc) · 1.25 KB
/
group_chromosomes.py
File metadata and controls
executable file
·54 lines (38 loc) · 1.25 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
#!/usr/bin/env pypy3
"""
Script that groups input lines by chromosome.
This script reads lines from standard input, expecting each line to contain
at least two whitespace-separated fields. The first field is treated as the
chromosome name (key), and the second field as a sample associated with that chromosome.
The script collects all samples associated with each chromosome and outputs
each chromosome followed by a tab and a space-separated list of its associated samples.
Input format (tab- or space-separated, at least two fields per line):
chrom sample
Example input:
chr1 sampleA
chr2 sampleB
chr1 sampleC
chr3 sampleD
Example output:
chr1 sampleA sampleC
chr2 sampleB
chr3 sampleD
Usage:
cat input.txt | group_chromosomes.py
"""
import sys
import signal
signal.signal(signal.SIGPIPE, signal.SIG_DFL)
from collections import defaultdict
def group_chromosomes(file):
groups = defaultdict(list)
for line in file:
parts = line.strip().split('\t')
if len(parts) < 2:
continue
key, val = parts[0], parts[1]
groups[key].append(val)
for key in sorted(groups):
print(f"{key}\t{' '.join(groups[key])}")
if __name__ == "__main__":
group_chromosomes(sys.stdin)