-
Notifications
You must be signed in to change notification settings - Fork 26
Expand file tree
/
Copy pathmg-compare-functions.py
More file actions
executable file
·176 lines (156 loc) · 8.06 KB
/
mg-compare-functions.py
File metadata and controls
executable file
·176 lines (156 loc) · 8.06 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
#!/usr/bin/env python
import os
import sys
import json
import copy
from argparse import ArgumentParser
from mglib import VERSION, AUTH_LIST, API_URL, get_auth_token, obj_from_url, urlencode, async_rest_api, biom_to_tab, merge_biom
prehelp = """
NAME
mg-compare-functions
VERSION
%s
SYNOPSIS
mg-compare-functions [ --help, --user <user>, --passwd <password>, --token <oAuth token>, --ids <metagenome ids>, --level <functional level>, --source <function datasource>, --filter_level <function level>, --filter_name <function name>, --intersect_source <taxon datasource>, --intersect_level <taxon level>, --intersect_name <taxon name>, --evalue <evalue negative exponent>, --identity <percent identity>, --length <alignment length>, --format <cv: 'text' or 'biom'> ]
DESCRIPTION
Retrieve matrix of functional abundance profiles for multiple metagenomes.
"""
posthelp = """
Output
1. Tab-delimited table of functional abundance profiles, metagenomes in columns and functions in rows.
2. BIOM format of functional abundance profiles.
EXAMPLES
mg-compare-functions --ids "mgm4441679.3,mgm4441680.3,mgm4441681.3,mgm4441682.3" --level level2 --source KO --format text --evalue 8
SEE ALSO
-
AUTHORS
%s
"""
def main(args):
ArgumentParser.format_description = lambda self, formatter: self.description
ArgumentParser.format_epilog = lambda self, formatter: self.epilog
parser = ArgumentParser(usage='', description=prehelp%VERSION, epilog=posthelp%AUTH_LIST)
parser.add_argument("--ids", dest="ids", default=None, help="comma seperated list or file of KBase Metagenome IDs")
parser.add_argument("--url", dest="url", default=API_URL, help="communities API url")
parser.add_argument("--user", dest="user", default=None, help="OAuth username")
parser.add_argument("--passwd", dest="passwd", default=None, help="OAuth password")
parser.add_argument("--token", dest="token", default=None, help="OAuth token")
parser.add_argument("--level", dest="level", default='level3', help="functional level to retrieve abundances for, default is level3")
parser.add_argument("--source", dest="source", default='Subsystems', help="function datasource to filter results by, default is Subsystems")
parser.add_argument("--filter_level", dest="filter_level", default=None, help="function level to filter by")
parser.add_argument("--filter_name", dest="filter_name", default=None, help="function name to filter by, file or comma seperated list")
parser.add_argument("--intersect_source", dest="intersect_source", default='SEED', help="taxon datasource for insersection, default is SEED")
parser.add_argument("--intersect_level", dest="intersect_level", default=None, help="taxon level for insersection")
parser.add_argument("--intersect_name", dest="intersect_name", default=None, help="taxon name(s) for insersection, file or comma seperated list")
parser.add_argument("--output", dest="output", default='-', help="output: filename or stdout (-), default is stdout")
parser.add_argument("--format", dest="format", default='biom', help="output format: 'text' for tabbed table, 'biom' for BIOM format, default is biom")
parser.add_argument("--evalue", type=int, dest="evalue", default=15, help="negative exponent value for maximum e-value cutoff, default is 15")
parser.add_argument("--identity", type=int, dest="identity", default=60, help="percent value for minimum %% identity cutoff, default is 60")
parser.add_argument("--length", type=int, dest="length", default=15, help="value for minimum alignment length cutoff, default is 15")
parser.add_argument("--hierarchy", action="store_true", dest="hierarchy", help="Don't use id, show hierarchy")
parser.add_argument("--version", type=int, dest="version", default=1, help="M5NR annotation version to use, default is 1")
parser.add_argument("--temp", dest="temp", default=None, help="filename to temporarly save biom output at each iteration")
# get inputs
opts = parser.parse_args()
if not opts.ids:
sys.stderr.write("ERROR: one or more ids required\n")
return 1
if (opts.filter_name and (not opts.filter_level)) or ((not opts.filter_name) and opts.filter_level):
sys.stderr.write("ERROR: both --filter_level and --filter_name need to be used together\n")
return 1
if (opts.intersect_name and (not opts.intersect_level)) or ((not opts.intersect_name) and opts.intersect_level):
sys.stderr.write("ERROR: both --intersect_level and --intersect_name need to be used together\n")
return 1
if opts.format not in ['text', 'biom']:
sys.stderr.write("ERROR: invalid input format\n")
return 1
# get auth
token = get_auth_token(opts)
# build url
id_list = []
if os.path.isfile(opts.ids):
id_str = open(opts.ids, 'r').read()
try:
id_obj = json.loads(id_str)
if 'elements' in id_obj:
id_list = id_obj['elements'].keys()
elif 'members' in id_obj:
id_list = map(lambda x: x['ID'], id_obj['members'])
except:
id_list = id_str.strip().split('\n')
else:
id_list = opts.ids.strip().split(',')
params = [('group_level', opts.level),
('source', opts.source),
('evalue', opts.evalue),
('identity', opts.identity),
('length', opts.length),
('version', opts.version),
('result_type', 'abundance'),
('asynchronous', '1') ]
if opts.intersect_level and opts.intersect_name:
params.append(('filter_source', opts.intersect_source))
params.append(('filter_level', opts.intersect_level))
if os.path.isfile(opts.intersect_name):
with open(opts.intersect_name) as file_:
for f in file_:
params.append(('filter', f.strip()))
else:
for f in opts.intersect_name.strip().split(','):
params.append(('filter', f))
# retrieve data
biom = None
size = 50
if len(id_list) > size:
for i in range(0, len(id_list), size):
sub_ids = id_list[i:i+size]
cur_params = copy.deepcopy(params)
for i in sub_ids:
cur_params.append(('id', i))
cur_url = opts.url+'/matrix/function?'+urlencode(cur_params, True)
cur_biom = async_rest_api(cur_url, auth=token)
biom = merge_biom(biom, cur_biom)
if opts.temp:
json.dump(biom, open(opts.temp, 'w'))
else:
for i in id_list:
params.append(('id', i))
url = opts.url+'/matrix/function?'+urlencode(params, True)
biom = async_rest_api(url, auth=token)
if opts.temp:
json.dump(biom, open(opts.temp, 'w'))
# get sub annotations
sub_ann = set()
if opts.filter_name and opts.filter_level:
# get input filter list
filter_list = []
if os.path.isfile(opts.filter_name):
with open(opts.filter_name) as file_:
for f in file_:
filter_list.append(f.strip())
else:
for f in opts.filter_name.strip().split(','):
filter_list.append(f)
# annotation mapping from m5nr
params = [('version', opts.version),
('min_level', opts.level),
('source', opts.source) ]
url = opts.url+'/m5nr/ontology?'+urlencode(params, True)
data = obj_from_url(url)
level = 'level4' if opts.level == 'function' else opts.level
for ann in data['data']:
if (opts.filter_level in ann) and (level in ann) and (ann[opts.filter_level] in filter_list):
sub_ann.add(ann[level])
# output data
if (not opts.output) or (opts.output == '-'):
out_hdl = sys.stdout
else:
out_hdl = open(opts.output, 'w')
if opts.format == 'biom':
out_hdl.write(json.dumps(biom)+"\n")
else:
biom_to_tab(biom["data"], out_hdl, rows=sub_ann, use_id=not opts.hierarchy)
out_hdl.close()
return 0
if __name__ == "__main__":
sys.exit(main(sys.argv))