Skip to content

Commit 881b640

Browse files
committed
Automated filetype and analysis selection.
1 parent 2153240 commit 881b640

3 files changed

Lines changed: 122 additions & 70 deletions

File tree

AlignmentProcessor.py

Lines changed: 13 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,13 @@
1919
from glob import glob
2020
import os
2121

22-
def checkInput(axt, kaks, phylip, codeml, outdir):
22+
def checkInput(ref, kaks, codeml, outdir):
2323
'''Makes sure necessary programs are installed and proper file conversion
2424
is run for optional analysis program.'''
2525
proceed = True
26+
if not ref:
27+
print("\n\tError: Please specify a reference species.\n")
28+
proceed = False
2629
if kaks == True:
2730
if codeml == True:
2831
print("\n\tError: Please specify only one analysis program.\n")
@@ -32,10 +35,6 @@ def checkInput(axt, kaks, phylip, codeml, outdir):
3235
print("\n\tError: Please install KaKs_Calculator in the\
3336
AlignmentProcessor bin.\n")
3437
proceed = False
35-
if axt == False:
36-
print("\n\tError: Files must be converted into axt format for use\
37-
with KaKs_Clculator.\n")
38-
proceed = False
3938
if codeml == True:
4039
control = glob(outdir + "*.ctl")
4140
if len(control) == 0:
@@ -52,10 +51,6 @@ def checkInput(axt, kaks, phylip, codeml, outdir):
5251
proceed = False
5352
if phyml == False:
5453
print("\n\tError: Please install and rename PhyML for use\
55-
with CodeML.\n")
56-
proceed = False
57-
if phylip == False:
58-
print("\n\tError: Files must be converted into phylip format for use\
5954
with CodeML.\n")
6055
proceed = False
6156
return proceed
@@ -172,9 +167,9 @@ def main():
172167
help="Specifies that sequences containing internal stop codons should be \
173168
ratained.")
174169
parser.add_argument("--axt", action="store_true",
175-
help="Convert files to axt format.")
170+
help="Convert files to axt format without calling KaKs_Calculator.")
176171
parser.add_argument("--phylip", action="store_true",
177-
help="Convert files to sequential phylip format.")
172+
help="Convert files to sequential phylip format without calling CodeML.")
178173
parser.add_argument("--kaks", action="store_true",
179174
help="Calls KaKs_Calculator on axt files.")
180175
parser.add_argument("-m", default="NG",
@@ -206,15 +201,14 @@ def main():
206201
forward = args.f
207202
ntree = args.n
208203
cleanup = args.cleanUp
209-
# Check inout commands prior to running:
210-
if not ref:
211-
print("\n\tError: Please specify a reference species.\n")
212-
quit()
213-
if axt == True and phylip == True:
214-
print("\n\tError: Please specify only one file type.\n")
215-
quit()
216-
proceed = checkInput(axt, kaks, phylip, codeml, outdir)
204+
# Check input commands prior to running:
205+
proceed = checkInput(ref, kaks, codeml, outdir)
217206
if proceed == True:
207+
# Set axt or phylip
208+
if codeml == True:
209+
phylip = True
210+
elif kaks == True:
211+
axt = True
218212
# Set checkpoint variables to False:
219213
sf = False
220214
ff = False

bin/04_CallCodeML.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -72,10 +72,9 @@ def main():
7272
# Save path to the AlignmentProcessor directory
7373
ap = os.getcwd() + "/"
7474
if " " in ap:
75-
# Change to warning ########################################################
76-
print("\tWARNING: AlignmentProcessor will not run properly if there \
77-
is a space in its PATH name.")
78-
ap = ap.replace(" (ASU)", "")
75+
print("\tError: AlignmentProcessor will not run properly if there \
76+
is a space in its PATH name. Exiting.\n")
77+
quit()
7978
run = False
8079
# Parse command
8180
parser = argparse.ArgumentParser(description="Runs CodeML on all files \

bin/ConcatenateCodeML.py

Lines changed: 106 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -3,67 +3,126 @@
33

44
import argparse
55
from glob import glob
6+
import re
67

7-
def concatenate(multiple, pairwise, indir, outfile):
8-
# Parses CodeML output and prints to file
9-
if multiple == True and pairwise == True:
10-
print("\n\tPlease specify only one alignment type.\n")
11-
quit()
12-
elif multiple == True:
13-
# Read in codeml output for multiple alignments
14-
readMultiple(indir, outfile)
15-
elif pairwise == True:
16-
# Read in codeml output for pairwise alignments
17-
readPairwise(indir, outfile)
8+
def inputList(indir):
9+
# Creates list of mlc files and determines which analysis was run
10+
infiles = glob(indir + "*")
11+
# Remove tmp dir
12+
for i in infiles:
13+
if i[i.rfind("/")+1:] == "tmp":
14+
infiles.remove(i)
15+
with open(infiles[0], "r") as ali:
16+
# Identify analysis
17+
for line in ali:
18+
if "dN/dS (w) for site classes" in line:
19+
typ = "brsite"
20+
break
21+
elif "dN & dS for each branch" in line:
22+
typ = "brspec"
23+
break
24+
elif "pairwise comparison, codon frequencies:" in line:
25+
typ = "pw"
26+
break
27+
return infiles, typ
28+
29+
def branchSite(infiles, outfile):
30+
# Saves data from branch site analysis to csv
31+
save = False
32+
with open(outfile, "w") as output:
33+
# Open output file and write header
34+
output.write("TranscriptID,Foreground_dN/dS,Background_dN/dS,\
35+
TreeLength,lnL,Species,SitesUnderSelection (Amino Acid; PosteriorProbability)\n")
36+
for infile in infiles:
37+
try:
38+
# Get gene id and number of species remaining for each file
39+
filename = infile.split("/")[-1]
40+
transcript = filename.split(".")[0]
41+
species = filename.split(".")[1]
42+
if int(species) > 2:
43+
with open(infile, "r") as mlc:
44+
sites = ""
45+
for line in mlc:
46+
# Get substitution rates, tree lengths, etc
47+
if save == True:
48+
if "The grid" in line:
49+
save = False
50+
elif line.strip() and "Positive" not in line:
51+
# Record sites and probability
52+
splt = line.split()
53+
sites += ("{} ({}; {}),").format(splt[0],
54+
splt[1], splt[2])
55+
elif "tree length =" in line:
56+
length = line.split("=")[1].strip()
57+
elif "lnL" in line:
58+
lnl = line.split("):")[1]
59+
lnl = lnl.split()[0].strip()
60+
elif "background w" in line:
61+
# Convert multiple spaces to single space
62+
line = re.sub(" +", " ", line)
63+
bw = line.split()[4]
64+
elif "foreground w" in line:
65+
# Convert multiple spaces to single space
66+
line = re.sub(" +", " ", line)
67+
fw = line.split()[4]
68+
elif "Bayes Empirical Bayes" in line:
69+
save = True
70+
# Append data to list and convert into string
71+
data = [fw, bw, length, lnl, species, sites[:-1]]
72+
for i in data:
73+
transcript += "," + i
74+
output.write(transcript + "\n")
75+
else:
76+
# Skip files with only two sequences
77+
pass
78+
except IsADirectoryError:
79+
pass
1880

19-
def readMultiple(indir, outfile):
81+
def branchSpecific(infiles, outfile):
82+
# Saves data from branch specific analysis to csv
2083
with open(outfile, "w") as output:
2184
# Open output file and write header
2285
output.write("TranscriptID,dN,dS,dN/dS,TreeLength,lnL,Species\n")
23-
infiles = glob(indir + "*")
2486
for infile in infiles:
2587
try:
2688
# Get gene id and number of species remaining for each file
2789
filename = infile.split("/")[-1]
2890
transcript = filename.split(".")[0]
2991
species = filename.split(".")[1]
3092
if int(species) > 2:
93+
with open(infile, "r") as mlc:
94+
for line in mlc:
95+
# Get substitution rates, tree lengths, etc
96+
if "tree length =" in line:
97+
length = line.split("=")[1].strip()
98+
elif "tree length for dN" in line:
99+
dn = line.split(":")[1].strip()
100+
elif "tree length for dS" in line:
101+
ds = line.split(":")[1].strip()
102+
elif "lnL" in line:
103+
lnl = line.split("):")[1]
104+
lnl = lnl.split()[0].strip()
105+
# Calculate dN/dS and save as a string
31106
try:
32-
with open(infile, "r") as mlc:
33-
for line in mlc:
34-
# Get substitution rates, tree lengths, etc
35-
if "tree length =" in line:
36-
length = line.split("=")[1].strip()
37-
elif "tree length for dN" in line:
38-
dn = line.split(":")[1].strip()
39-
elif "tree length for dS" in line:
40-
ds = line.split(":")[1].strip()
41-
elif "lnL" in line:
42-
lnl = line.split("):")[1]
43-
lnl = lnl.split()[0].strip()
44-
# Calculate dN/dS and save as a string
45-
try:
46-
dnds = str(float(dn)/float(ds))
47-
except ZeroDivisionError:
48-
dnds = "NA"
49-
# Append data to list and convert into string
50-
data = [dn, ds, dnds, length, lnl, species]
51-
transcript += ","
52-
for i in data:
53-
transcript += str(i) + ","
54-
transcript = transcript[:-1]
55-
output.write(transcript + "\n")
107+
dnds = str(float(dn)/float(ds))
108+
except ZeroDivisionError:
109+
dnds = "NA"
110+
# Append data to list and convert into string
111+
data = [dn, ds, dnds, length, lnl, species]
112+
for i in data:
113+
transcript += "," + str(i)
114+
output.write(transcript + "\n")
56115
else:
57116
# Skip files with only two sequences
58117
pass
59-
except IsADirectoryError, UnboundLocalError:
118+
except IsADirectoryError:
60119
pass
61120

62-
def readPairwise(indir, outfile):
121+
def pairwise(infiles, outfile):
122+
# Saves data from pairwise analysis to csv
63123
with open(outfile, "w") as output:
64124
# Open output file and write header
65125
output.write("TranscriptID,dN,dS,dN/dS,lnL\n")
66-
infiles = glob(indir + "*")
67126
for infile in infiles:
68127
try:
69128
# Get gene id and number of species remaining for each file
@@ -94,10 +153,6 @@ def readPairwise(indir, outfile):
94153
def main():
95154
parser = argparse.ArgumentParser(description="This script will \
96155
concatenate CodeML output and print them in a single file.")
97-
parser.add_argument("--multiple", action="store_true",
98-
help="Indicates that a multiple alignment was used")
99-
parser.add_argument("--pairwise", action="store_true",
100-
help="Indicates that a pairwise alignment was used.")
101156
parser.add_argument("-i", help="Path to CodeML output directory")
102157
parser.add_argument("-o", help="Name of output file.")
103158
# Parse command
@@ -106,9 +161,13 @@ def main():
106161
if indir[-1] != "/":
107162
indir += "/"
108163
outfile = args.o
109-
multiple = args.multiple
110-
pairwise = args.pairwise
111-
concatenate(multiple, pairwise, indir, outfile)
164+
infiles, typ = inputList(indir)
165+
if typ == "pw":
166+
pairwise(infiles, outfile)
167+
elif typ == "brsite":
168+
branchSite(infiles, outfile)
169+
elif typ == "brspec":
170+
branchSpecific(infiles, outfile)
112171

113172
if __name__ == "__main__":
114173
main()

0 commit comments

Comments
 (0)