|
17 | 17 | #################################################################### |
18 | 18 | ## Parsing command line options #################################### |
19 | 19 | #################################################################### |
20 | | -# USAGE: ./genome_info.pl --species "Homo sapiens" --outfile ../../appl_db/ird_v1/hsa ... |
| 20 | +##docker run -it --mount type=bind,source=/tmp,target=/tmp opencb/cellbase-builder:6.2.0-SNAPSHOT /opt/cellbase/scripts/ensembl-scripts/genome_info.pl --species "Mus musculus" --assembly GRCm39 --outfile /tmp |
| 21 | + |
| 22 | +# USAGE: ./genome_info.pl --species "Homo sapiens" --assembly GRCh38 --outfile ../../appl_db/ird_v1/hsa ... |
21 | 23 |
|
22 | 24 | ## Parsing command line |
23 | 25 | GetOptions ('species=s' => \$species, 'assembly=s' => \$assembly, 'o|outfile=s' => \$outfile, 'phylo=s' => \$phylo, |
|
29 | 31 |
|
30 | 32 | if ($outfile eq "") { |
31 | 33 | $outfile = "/ensembl-data/genome_info.json"; |
32 | | - # $outfile = "/ensembl-data/$species.json"; |
33 | 34 | } |
34 | 35 |
|
35 | 36 | #################################################################### |
|
42 | 43 | # Bio::EnsEMBL::Registry->load_all("$ENSEMBL_REGISTRY"); |
43 | 44 | if($phylo eq "" || $phylo eq "vertebrate") { |
44 | 45 | print ("In vertebrates section\n"); |
45 | | - if ($species eq "Homo sapiens" && $assembly eq "GRCh38") { |
46 | | - print ("Human selected, assembly ".$assembly." selected, connecting to port ".$ENSEMBL_PORT."\n"); |
47 | | - Bio::EnsEMBL::Registry->load_registry_from_db( |
48 | | - -host => $ENSEMBL_HOST, |
49 | | - -user => $ENSEMBL_USER, |
50 | | - -port => $ENSEMBL_PORT, |
51 | | - -verbose => $verbose |
52 | | - ); |
53 | | - } else { |
54 | | - print ("Human selected, assembly ".$assembly." no supported\n"); |
55 | | - } |
| 46 | + print ("Species: ".$species.", assembly ".$assembly.", connecting to: ".$ENSEMBL_HOST.":".$ENSEMBL_PORT."\n"); |
| 47 | + Bio::EnsEMBL::Registry->load_registry_from_db( |
| 48 | + -host => $ENSEMBL_HOST, |
| 49 | + -user => $ENSEMBL_USER, |
| 50 | + -port => $ENSEMBL_PORT, |
| 51 | + -verbose => $verbose |
| 52 | + ); |
56 | 53 | } else { |
57 | 54 | print ("In no-vertebrates section\n"); |
58 | 55 | Bio::EnsEMBL::Registry->load_registry_from_db( |
|
64 | 61 |
|
65 | 62 | my $slice_adaptor = Bio::EnsEMBL::Registry->get_adaptor($species, "core", "Slice"); |
66 | 63 | my $karyotype_adaptor = Bio::EnsEMBL::Registry->get_adaptor($species, "core", "KaryotypeBand"); |
67 | | -# my $gene_adaptor = Bio::EnsEMBL::Registry->get_adaptor($species, "core", "Gene"); |
68 | 64 | #################################################################### |
69 | 65 |
|
70 | 66 | my %info_stats = (); |
|
81 | 77 | $chromosome{'start'} = int($chrom->start()); |
82 | 78 | $chromosome{'end'} = int($chrom->end()); |
83 | 79 | $chromosome{'size'} = int($chrom->seq_region_length()); |
84 | | -# $chromosome{'numberGenes'} = scalar @{$chrom->get_all_Genes()}; |
85 | 80 | $chromosome{'isCircular'} = $chrom->is_circular(); |
86 | 81 |
|
87 | 82 | my @cytobands = (); |
88 | 83 | foreach my $cyto(@{$karyotype_adaptor->fetch_all_by_chr_name($chrom->seq_region_name)}) { |
89 | | -# print $cytoband->name."\n"; |
90 | 84 | my %cytoband = (); |
91 | 85 | $cytoband{'name'} = $cyto->name(); |
92 | 86 | $cytoband{'start'} = int($cyto->start()); |
|
96 | 90 | push(@cytobands, \%cytoband); |
97 | 91 | } |
98 | 92 |
|
99 | | - ## check if any cytoband has been added |
| 93 | + ## Check if any cytoband has been added |
100 | 94 | ## If not a unique cytoband covering all chromosome is added. |
101 | 95 | if(@cytobands == 0) { |
102 | 96 | my %cytoband = (); |
|
110 | 104 | $chromosome{'cytobands'} = \@cytobands; |
111 | 105 |
|
112 | 106 | push(@chromosomes, \%chromosome); |
113 | | -# push(@chrom_ids, $chrom->seq_region_name); |
114 | 107 | } |
115 | 108 | $info_stats{'chromosomes'} = \@chromosomes; |
116 | 109 |
|
|
124 | 117 | $supercontig{'start'} = int($supercon->start()); |
125 | 118 | $supercontig{'end'} = int($supercon->end()); |
126 | 119 | $supercontig{'size'} = int($supercon->seq_region_length()); |
127 | | -# $supercontig{'numberGenes'} = scalar @{$supercon->get_all_Genes()}; |
128 | 120 | $supercontig{'isCircular'} = $supercon->is_circular(); |
129 | 121 |
|
130 | 122 | ## Adding an unique cytoband covering all chromosome is added. |
|
151 | 143 |
|
152 | 144 | sub print_parameters { |
153 | 145 | print "Parameters: "; |
154 | | - print "species: $species, outfile: $outfile, "; |
| 146 | + print "species: $species, assembly: $assembly, outfile: $outfile, "; |
155 | 147 | print "ensembl-registry: $ENSEMBL_REGISTRY, "; |
156 | 148 | print "ensembl-host: $ENSEMBL_HOST, ensembl-port: $ENSEMBL_PORT, "; |
157 | 149 | print "ensembl-user: $ENSEMBL_USER, verbose: $verbose, help: $help"; |
|
0 commit comments