forked from WEHI-ResearchComputing/wehi-gdc
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsingle_file_download.py
More file actions
70 lines (58 loc) · 1.91 KB
/
single_file_download.py
File metadata and controls
70 lines (58 loc) · 1.91 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
from helpers import GDCFileAuthProvider, GDCFileDownloader
from argparse import ArgumentParser
import multiprocessing as mp
import sys
def build_parser():
parser = ArgumentParser()
parser.add_argument('--output-paths',
help='comma output paths (filename) to download to',
dest='output_paths',
required=True)
parser.add_argument('--file-ids',
dest='file_ids',
help='GDC file ids',
required=True)
parser.add_argument('--md5sums',
dest='md5sums',
help='expected md5 hashes',
required=False)
parser.add_argument('--sizes',
dest='sizes',
help='expected file sizes',
required=False)
return parser
def main(argv):
parser = build_parser()
options = parser.parse_args(args=argv)
output_paths = options.output_paths.split(',')
file_ids = options.file_ids.split(',')
md5sums = options.md5sums
if not md5sums:
md5sums = [None] * len(file_ids)
else:
md5sums = md5sums.split(',')
sizes = options.sizes
if not sizes:
sizes = [None] * len(file_ids)
else:
sizes = [int(s) for s in sizes.split(',')]
p = mp.Pool(len(file_ids))
downloads = []
auth_provider = GDCFileAuthProvider()
for (output_path, file_id, md5sum, size) in zip(output_paths, file_ids, md5sums, sizes):
output_path = output_path.strip()
file_id = file_id.strip()
dl = GDCFileDownloader(file_id, output_path, auth_provider=auth_provider, md5sum=md5sum, expected_file_size=size)
h = p.apply_async(dl)
downloads.append(h)
success = True
for dl in downloads:
success = success and dl.get()
if success:
print('Downloads succeeded.')
quit(0)
else:
print('Downloads failed.')
quit(1)
if __name__ == '__main__':
main(sys.argv[1:])