-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathanalyze-SR.py
More file actions
executable file
·118 lines (107 loc) · 3.72 KB
/
analyze-SR.py
File metadata and controls
executable file
·118 lines (107 loc) · 3.72 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
#!/usr/bin/env python
#=========================================================================
# This is OPEN SOURCE SOFTWARE governed by the Gnu General Public
# License (GPL) version 3, as described at www.opensource.org.
# Copyright (C)2017 William H. Majoros (martiandna@gmail.com).
#=========================================================================
from __future__ import (absolute_import, division, print_function,
unicode_literals, generators, nested_scopes, with_statement)
from builtins import (bytes, dict, int, list, object, range, str, ascii,
chr, hex, input, next, oct, open, pow, round, super, filter, map, zip)
# The above imports should allow this program to run in both Python 2 and
# Python 3. You might need to update your version of module "future".
import sys
import ProgramName
from Shuffler import Shuffler
def loadBetas(filename):
sums={}
counts={}
with open(filename,"rt") as IN:
for i in range(3): IN.readline()
for line in IN:
fields=line.rstrip().split()
(hex,score)=fields
if(score=="."): score=0.0
else: score=float(score)
setSubstrings(hex,score,sums,counts)
hash={}
for kmer in sums.keys():
hash[kmer]=sums[kmer]/counts[kmer]
return hash
def setSubstrings(hex,score,sums,counts):
L=len(hex)
for i in range(0,L):
for j in range(i+1,L+1):
sub=hex[i:j]
sums[sub]=sums.get(sub,0.0)+score
counts[sub]=counts.get(sub,0.0)+1.0
def trimMotifs(motifs,size):
new=[]
for motif in motifs:
L=len(motif)
if(L<size): new.append(motif)
else:
for i in range(0,L-size+1):
sub=motif[i:i+size]
new.append(sub)
return new
def IUPAC(motifs):
new=set()
for motif in motifs:
alts=[motif]
changes=True
while(changes):
changes=replace(alts,"N","ACGT")
changes=replace(alts,"R","AG") or changes
changes=replace(alts,"Y","CT") or changes
changes=replace(alts,"D","AGT") or changes
changes=replace(alts,"K","TG") or changes
changes=replace(alts,"M","AC") or changes
changes=replace(alts,"S","GC") or changes
changes=replace(alts,"W","AT") or changes
changes=replace(alts,"U","T") or changes
for alt in alts: new.add(alt)
asList=[]
for alt in new: asList.append(alt)
return asList
def replace(alts,symbol,replacements):
i=0
changes=False
while(i<len(alts)):
motif=alts[i]
if(symbol not in motif):
i+=1
continue
del alts[i]
generate(motif,symbol,replacements,alts)
changes=True
return changes
def generate(motif,symbol,replacements,alts):
L=len(motif)
for i in range(L):
if(motif[i]==symbol):
for c in replacements:
alt=motif[0:i]+c+motif[i+1:L]
alts.append(alt)
def loadMotifs(filename):
motifs=[]
with open(filename,"rt") as IN:
for line in IN:
motif=line.rstrip()
if(wantShuffle): motif=Shuffler.shuffleString(motif)
if(len(motif)>0): motifs.append(motif)
return motifs
#=========================================================================
# main()
#=========================================================================
if(len(sys.argv)!=4):
exit(ProgramName.get()+" <betas.txt> <motifs.txt> <shuffle:0|1>\n")
(betaFile,motifFile,wantShuffle)=sys.argv[1:]
wantShuffle=int(wantShuffle)
betas=loadBetas(betaFile)
motifs=loadMotifs(motifFile)
motifs=trimMotifs(motifs,6)
motifs=IUPAC(motifs)
for motif in motifs:
score=betas[motif]
print(score,motif,sep="\t")