-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathebi_metagenomics
More file actions
153 lines (121 loc) · 4.05 KB
/
ebi_metagenomics
File metadata and controls
153 lines (121 loc) · 4.05 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
#!/usr/bin/env python
import click
import ebisearch
from pprint import pprint
import json
import requests
import os
import pkg_resources
def get_data(filename):
return pkg_resources.resource_filename('ebisearch_data',filename)
with open(get_data('ebi_metagenomics_run_data.json'), 'r') as json_data:
run_data = json.load(json_data)
@click.group()
def main():
pass
def get_pipeline_version(run):
"""Get the pipeline version for a run
run: id of a run
"""
run_details = ebisearch.get_entries(
domain = "metagenomics_runs",
entryids = run,
fields = "pipeline_version",
fieldurl=False,
viewurl=False)
version = run_details[0]["fields"]["pipeline_version"][0]
return version
def get_possible_run_data(run):
"""Get the possible data information for a run
run: id of a run
"""
version = get_pipeline_version(run)
data = run_data["pipeline_version"][version]
return data
@click.command(
'print_possible_run_data',
short_help='Get possible data for a run')
@click.option(
'--run',
help='Id of a run in EBI metagenomics')
def print_possible_run_data(run):
"""Print information about the possible data to download for a run"""
data = get_possible_run_data(run)
for section in data:
print(run_data["sections"][section])
for d in data[section]:
print("\t%s" % (d))
for detail, value in run_data["description"][d].items():
print("\t\t%s: %s" % (detail, value))
def check_data(run, data):
"""Check that a data type can be downloaded for a run
run: id of a run
data: type of data to check
"""
possible_data = get_possible_run_data(run)
data_found = False
section = ''
for sect in possible_data:
if data in possible_data[sect]:
data_found = True
section = sect
if not data_found:
err_str = "Data %s can not be downloaded for %s" % (data, run)
raise ValueError(err_str)
return section
def check_chunk_use(data, data_section):
"""Check if a type of data would require the use of chunk
data: type of data to check
data_section: section corresponding to the data
run_url: URL of the run
"""
return (data_section == "sequences" and data != "ncRNA-tRNA-FASTA") or \
(data_section == "function" and data == "InterProScan")
def download(url, file, write_type):
"""Download from an URL and redirect the content into a file
url: URL where the data are
file: path to file where the data are written
write_type: "wb"
"""
r = requests.get(url, stream=True)
r.raise_for_status()
with open(file, write_type) as fd:
for chunk in r.iter_content(chunk_size=128):
fd.write(chunk)
@click.command('download_run_data', short_help='Download run data')
@click.option(
'--run',
help='Id of a run in EBI metagenomics')
@click.option(
'--data',
help='Data to download for the run (accessible with get_possible_run_data')
@click.option(
'--file',
type=click.Path(dir_okay=True, writable=True),
help='File to export downloaded data')
def download_run_data(run, data, file):
"""Download data for a run"""
data_section = check_data(run, data)
run_details = ebisearch.get_entries(
domain = "metagenomics_runs",
entryids = run,
fields = "pipeline_version",
fieldurl=True,
viewurl=False)
run_url = run_details[0]['fieldURLs'][0]['value']
url = run_url + "/%s" % (data_section)
url += "/%s" % (data)
if check_chunk_use(data, data_section):
url += "/chunks"
chunk_nb_r = requests.get(url, headers={"accept": "application/json"})
chunk_nb_r.raise_for_status()
chunk_nb = chunk_nb_r.json()
for nb in range(0, chunk_nb):
chunk_url = url + "/%s" % (nb+1)
download(chunk_url, file, "ab")
else:
download(url, file, "wb")
main.add_command(print_possible_run_data)
main.add_command(download_run_data)
if __name__ == "__main__":
main()