forked from Maggi-Chen/FusionSeeker
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathconstruct_gtf.py
More file actions
71 lines (60 loc) · 1.68 KB
/
construct_gtf.py
File metadata and controls
71 lines (60 loc) · 1.68 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
def sortgeneinfo(a):
return a[0]
def write_geneinfo():
f=open('testlast.ds','w')
for chrom in geneinfo:
for c in geneinfo[chrom]:
f.write(chrom+'\t'+str(c[0])+'\t'+str(c[1])+'\t'+c[2]+'\t'+c[3]+'\t')
for m in c[4]:
f.write(str(m[0])+','+str(m[1])+','+str(m[2])+';')
f.write('\n')
f.close()
def readgeneinfo(geneinfofilename):
allgene=open(geneinfofilename,'r').read().split('\n')[:-1]
global geneinfo
geneinfo={}
for c in allgene:
c=c.split('\t')
eventinfo=[int(c[1]),int(c[2]),c[3],c[4]]
exoninfo=[]
for m in c[5].split(';')[:-1]:
m=m.split(',')
exoninfo+=[[m[0],int(m[1]),int(m[2])]]
eventinfo+=[exoninfo]
if c[0] not in geneinfo:
geneinfo[c[0]]=[eventinfo]
else:
geneinfo[c[0]]+=[eventinfo]
return geneinfo
def create(gtfinfo,goodchrom,usegeneid,writeds=False):
global geneinfo
geneinfo={}
for chrom in goodchrom:
geneinfo[chrom]=[]
allgt=[c for c in gtfinfo if c.split('\t')[0]==chrom and c.split('\t')[2]!='transcript']
lastgene=''
genpos=''
exonpos=[]
for event in allgt:
if usegeneid:
genename=event.split('gene_id "')[1].split('"')[0]
else:
try:
genename=event.split('gene_name "')[1].split('"')[0]
except:
continue
if genename==lastgene:
exonpos+=[[event.split('\t')[2],int(event.split('\t')[3]),int(event.split('\t')[4])]]
else:
if lastgene!='':
geneinfo[chrom]+=[genepos+[exonpos]]
lastgene=genename
genepos=[int(event.split('\t')[3]),int(event.split('\t')[4]),genename,event.split('\t')[6]]
exonpos=[]
if exonpos!=[]:
geneinfo[chrom]+=[genepos+[exonpos]]
for chrom in geneinfo:
geneinfo[chrom].sort(key=sortgeneinfo)
if writeds:
write_geneinfo()
return geneinfo