-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathfind_genes.rb
More file actions
executable file
·49 lines (40 loc) · 1.38 KB
/
find_genes.rb
File metadata and controls
executable file
·49 lines (40 loc) · 1.38 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
#!/usr/bin/ruby
# locate cox pathway genes existence in GEO datasets & count their existence
# USAGE: for i in results/*.csv;do (echo $i;./find_genes.rb data/COX_pathway_genesids.csv $i);done
require 'rubygems'
require 'fastercsv'
cox_pathway_genes_file = ARGV[0]
infile = ARGV[1]
filename = infile.split("/")[1].split(".")[0]
found_genes_output_file = "results/found_genes/#{filename}_found_genes.csv"
# read cox pathway genes
cox_genes = {}
FasterCSV.foreach(cox_pathway_genes_file) do |gene|
cox_genes[gene.to_s] = nil
end
# find COX genes through the probes and count them
gene_expressions = Hash.new { |h,k| h[k] = [] }
genes_appearance = {}
FasterCSV.foreach(infile) do |row|
row[1].each do |gene|
gene_symbols = gene.split(" /// ")
gene_symbols.each do |gene_symbol|
if cox_genes.has_key?(gene_symbol)
gene_expressions[gene_symbol.to_s + "|" + row[0]] << row
# count COX genes appearance through the probes
if !genes_appearance.has_key?(gene_symbol)
genes_appearance[gene_symbol.to_s] = 1
elsif genes_appearance.has_key?(gene_symbol)
genes_appearance[gene_symbol.to_s] += 1
end
end
end
end
end
# print cox genes expressions
found_genes_output = File.open(found_genes_output_file, "w")
gene_expressions.sort.each do |g|
gene_symbol = g[0].split("|")[0].to_s
found_genes_output.puts gene_symbol + "," + g[1].join(",")
end
found_genes_output.close