-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathmsa2json
More file actions
executable file
·55 lines (38 loc) · 1.58 KB
/
msa2json
File metadata and controls
executable file
·55 lines (38 loc) · 1.58 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
#!/usr/bin/env python3
import sys, json, signal
from argparse import ArgumentParser, FileType
from Bio import AlignIO
from BioExt.graphing import count_alignment
def main(args=None):
if args is None:
args = sys.argv[1:]
try:
signal.signal(signal.SIGPIPE, signal.SIG_DFL)
except ValueError:
pass
parser = ArgumentParser(description='translate a FASTA nucleotide file')
parser.add_argument('-f', '--frame', type=int, choices=range(3), default=0)
parser.add_argument('-c', '--counts', action='store_true')
parser.add_argument('input', nargs='?', type=FileType('r'), default=sys.stdin)
parser.add_argument('output', nargs='?', type=FileType('w'), default=sys.stdout)
ns = parser.parse_args(args)
alignment = AlignIO.read(ns.input, 'fasta')
r = next(iter(alignment))
matcher = None
if ns.counts:
import re
matcher = re.compile ('\:([0-9]+)$')
counts, alphabet = count_alignment (alignment, columns = list(range(len(r.seq))), limit = 1000000, embedded_counts = matcher)
amino_acid_counts = {}
alphabet_letters = alphabet[0]
for k in range (len(counts[0])):
amino_acid_counts [k + 1] = {}
for a_letter in range (len(alphabet_letters)):
if counts[a_letter][k] > 0.0:
amino_acid_counts [k + 1][alphabet_letters[a_letter]] = counts[a_letter][k]
if len (amino_acid_counts [k + 1]) == 0:
amino_acid_counts.pop (k+1)
json.dump (amino_acid_counts, ns.output)
return 0
if __name__ == '__main__':
sys.exit(main())