-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathparse_smurf_results.rb
More file actions
executable file
·74 lines (62 loc) · 1.97 KB
/
parse_smurf_results.rb
File metadata and controls
executable file
·74 lines (62 loc) · 1.97 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
#!/usr/bin/env ruby
# takes 2 arguments: results directory, threshold
results_dir = ARGV[0]
results = []
threshold = ARGV[1].to_f
SCOP = '/r/bcb/protein_structure/SCOP/dir.des.scop.txt_1.75.txt'
def parse_file(filename, superfamily, threshold)
results = []
name = ''
alignment = ''
rawscore = nil
pval = nil
File.foreach(filename) do |line|
if line =~ /^>/
# new entry
# close out last one
if name
results << {:name => name, :alignment => alignment, :raw_score => rawscore, :p_value => pval} if pval && pval <= threshold
end
name = line.chomp.gsub(/^>/,'')
pval = nil
rawscore = nil
alignment = ''
elsif line =~ /Raw score: (-?\d+\.?\d+)/
rawscore = Regexp.last_match[1].to_f
elsif line =~ /P-value: ([-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?)/
pval = Regexp.last_match[1].to_f
else # part of alignment
alignment += line
end
end
results << {:name => name, :alignment => alignment, :raw_score => rawscore, :p_value => pval} if pval && pval <= threshold
results.sort!{|a,b| a[:p_value] <=> b[:p_value]}
{:superfamily => superfamily, :results => results}
end
Dir.glob(File.join(results_dir, '*')).each do |superfamily_dir|
superfamily = File.basename(superfamily_dir)
next unless File.directory?(superfamily_dir)
Dir.glob(File.join(superfamily_dir, '*.out')).each do |smurf_file|
results << parse_file(smurf_file, superfamily, threshold)
end
end
scop = {}
File.foreach(SCOP) do |line|
next if line =~ /^#/
# 46461 sp a.1.1.1 - Ciliate (Paramecium caudatum) [TaxId: 5885]
sunid, kind, sccs, chain, rest = line.chomp.split(/\s+/, 5)
if kind == 'sf'
scop[sunid] = rest
end
end
counter = 0
results.each do |r|
puts "Superfamily #{r[:superfamily]}: #{scop[r[:superfamily]]}" unless r[:results].empty?
r[:results].each do |result|
puts result[:name]
puts result[:p_value]
puts
counter += 1
end
end
puts "#{counter} good hits"