-
Notifications
You must be signed in to change notification settings - Fork 7
Expand file tree
/
Copy pathcreate_sarek_samplesheet.py
More file actions
executable file
·44 lines (33 loc) · 1.19 KB
/
create_sarek_samplesheet.py
File metadata and controls
executable file
·44 lines (33 loc) · 1.19 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
#!/usr/bin/env python
import os
import sys
import re
fqdir = sys.argv[1]
outfile = sys.argv[2]
sampledict = {}
fqpattern = r'^(.*)_(S\d+)_L(\d+)_(R\d)_001.*$'
fcpattern = r'^.*_[AB]?([^_]+)$'
for root, dirs, fqlinks in os.walk(fqdir):
for link in fqlinks:
fqpath = os.readlink(os.path.join(root, link))
fqbasenm = re.match(fqpattern, link)
if fqbasenm:
sample = fqbasenm.group(1)
ssheet_idx = fqbasenm.group(2)
laneno = int(fqbasenm.group(3))
readnr = fqbasenm.group(4)
else:
continue
m = re.match(fcpattern, root)
fcid = m.group(1) if m else "NA"
readgrp = f"{fcid}.{laneno}.{ssheet_idx}" # PU, Plattform unit. Will be used as read tag in BAM-files.
sampledict.setdefault(readgrp, {"sample": sample, "R1": "", "R2": ""})
sampledict[readgrp][readnr] = fqpath
with open(outfile, 'w') as fout:
fout.write("patient,sample,lane,fastq_1,fastq_2\n")
for readgrp, data in sampledict.items():
samplenm = data["sample"]
R1 = data["R1"]
R2 = data["R2"]
entry = ",".join([samplenm, samplenm, readgrp, R1, R2])
fout.write(f"{entry}\n")