-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathorganizeKO.py
More file actions
80 lines (62 loc) · 2.39 KB
/
organizeKO.py
File metadata and controls
80 lines (62 loc) · 2.39 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
#Author: Michael Hoffert
#Date: 6/15/2017
#Credit to Rika Anderson
#Python version 2.7.12
"""Makes dictionary for organizing KEGG Orthologies"""
def parse_KO(keg):
d = {}
keg = open(keg)
keg_lines = keg.readlines()
A = ''
B = ''
C = ''
for i in range(8, len(keg_lines)):
if keg_lines[i][0] == 'A':
start = keg_lines[i].index('<b>')
stop = keg_lines[i].index('</b>')
A = keg_lines[i][(start + 3):stop]
d[A] = {}
elif keg_lines[i][0] == 'B':
if len(keg_lines[i]) > 3:
start = keg_lines[i].index('<b>')
stop = keg_lines[i].index('</b>')
B = keg_lines[i][(start + 3):stop]
d[A][B] = {}
elif keg_lines[i][0] == 'C':
if len(keg_lines[i]) > 3:
if 'BR' in keg_lines[i]:
start = keg_lines[i].index('0')
stop = keg_lines[i].index('BR')
C = keg_lines[i][(start + 5):stop-2]
elif 'PATH' in keg_lines[i]:
start = keg_lines[i].index('0')
stop = keg_lines[i].index('PATH')
C = keg_lines[i][(start + 5):stop-2]
else:
start = keg_lines[i].index('0')
C = keg_lines[i][(start + 5):]
d[A][B][C] = {}
elif keg_lines[i][0] == 'D':
x = keg_lines[i].index('K')
KO = keg_lines[i][x: x + 6]
name = keg_lines[i][x + 8: -1]
d[A][B][C][KO] = name
return d
#Assigns KEGG orthologies to ORFs from organized dict
def assign_KO_to_ORFs(ORFs, d):
for orf in ORFs:
KO = orf.KO
if KO != None:
KO = orf.KO[3:]
for A in d.keys():
for B in d[A].keys():
for C in d[A][B].keys():
for key in d[A][B][C].keys():
if KO == key:
orf.KO_A = A
orf.KO_B = B
orf.KO_C = C
orf.KO_data = d[A][B][C][key]
return ORFs
if __name__ == '__main__':
parse_KO('ko00001.keg')