Skip to content

Commit 23e0662

Browse files
committed
v0.8.3
1 parent 111ff3e commit 23e0662

2 files changed

Lines changed: 129 additions & 34 deletions

File tree

norm_scripts/restart_failedjobs_only.pl

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,18 +19,22 @@
1919
die $USAGE;
2020
}
2121
my @list;
22+
my $qopt=0;
2223
for(my $i=4;$i<@ARGV;$i++){
2324
my $option_rec = "false";
2425
if ($ARGV[$i] eq '-qlist'){
2526
$option_rec = "true";
2627
@list = split(",", $ARGV[$i+1]);
2728
$i++;
29+
$qopt++;
2830
}
2931
if ($option_rec eq 'false'){
3032
die "option \"$ARGV[$i]\" not recognized\n";
3133
}
3234
}
33-
35+
if ($qopt == 0){
36+
die "Please provide -qlist '3G,6G,10G,15G,30G,45G,60G\n";
37+
}
3438
my $dirs = $ARGV[0];
3539
my $LOC = $ARGV[1];
3640
my $errname = $ARGV[2];

norm_scripts/runall_normalization.pl

Lines changed: 124 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -317,6 +317,17 @@
317317
$cutoff_le = $cutoff_temp;
318318
}
319319

320+
if ($new_norm eq "true"){
321+
for(my $i=0; $i<@ARGV; $i++) {
322+
if ($ARGV[$i] eq "-alt_out"){
323+
$normdir = $ARGV[$i+1];
324+
$altstats = "-alt_stats $normdir/STATS/";
325+
unless (-d "$normdir/STATS/"){
326+
`mkdir -p $normdir/STATS/`;
327+
}
328+
}
329+
}
330+
}
320331
#check for white spaces
321332
my $to_trim = "false";
322333
open(DIRS, $sample_dir);
@@ -596,7 +607,18 @@
596607
}
597608
open(LOG, ">>$logfile");
598609
print LOG "\nPORT v0.8.3-beta\n";
599-
print LOG "\n*************\n$input\n*************\n";
610+
my $default_input = $input;
611+
#$default_input = `cat $shdir/runall_normalization.sh`;
612+
$default_input =~ s/perl\ //g;
613+
$default_input =~ s/runall_normalization.pl/run_normalization/g;
614+
$default_input =~ s/\-fa\n//;
615+
$default_input =~ s/\-fq\n//;
616+
$default_input =~ s/\-sam //;
617+
$default_input =~ s/\-bam //;
618+
$default_input =~ s/\-gz//;
619+
$default_input =~ s/\-se//;
620+
print LOG "\n*************\n$default_input\n*************\n";
621+
600622
if (-e "$logdir/$study.runall_normalization.out"){
601623
`rm $logdir/$study.runall_normalization.out`;
602624

@@ -629,22 +651,90 @@
629651
my @b = split(" ", $get_name);
630652
$name = $b[@b-1];
631653
}
632-
# if resumed at runall_cat_genes_files (or runall_cat_genes_files_norm step), go back one step and start from
633-
# runall_sam2genes (or runall_sam2genes_2)
654+
# if resumed at runall_cat_genes_files (or runall_cat_genes_files_norm step), unless temp files exist,
655+
# go back one step and start from runall_sam2genes (or runall_sam2genes_2)
656+
my $cat_flag = 0;
634657
if ($name =~ /runall_cat_genes_files$/){
635-
$cat_flag++;
636-
my $tempname = $name;
637-
$tempname =~ s/runall_cat_genes_files$/runall_sam2genes_gnorm/;
638-
$name = $tempname;
639-
658+
my $TEflag = 0;
659+
my $err_name = "cat_genes.0.*.err";
660+
my $new_queue = "-mem $queue_3G";
661+
my $res = `perl $norm_script_dir/restart_failedjobs_only.pl $sample_dir $LOC \"$err_name\" \"$new_queue\" -qlist \"$qlist\"`;
662+
my $rtmp = `wc -l $resume_file`;
663+
my ($res_cnt, $res_n) = split(" ", $rtmp);
664+
open(IN, $resume_file);
665+
while(my $line = <IN>){
666+
chomp($line);
667+
my @tcnt = glob("$LOC/$line/GNORM/*/*temp*");
668+
if (@tcnt > 0){
669+
$TEflag++;
670+
}
671+
}
672+
close(IN);
673+
# print "$TEflag\t$res_cnt\n";
674+
if ($TEflag ne $res_cnt){ #temp files don't exist
675+
$cat_flag = 1;
676+
my $tempname = $name;
677+
$tempname =~ s/runall_cat_genes_files$/runall_sam2genes_gnorm/;
678+
$name = $tempname;
679+
}
640680
}
641681
if ($name =~ /runall_cat_genes_files_norm$/){
642-
$cat_flag++;
643-
my $tempname = $name;
644-
$tempname =~ s/runall_cat_genes_files_norm$/runall_sam2genes_gnorm_2/;
645-
$name = $tempname;
682+
my $TEflag = 0;
683+
my $err_name = "cat_genes.1.*.err";
684+
my $new_queue = "-mem $queue_3G";
685+
my $res = `perl $norm_script_dir/restart_failedjobs_only.pl $sample_dir $LOC \"$err_name\" \"$new_queue\" -qlist \"$qlist\"`;
686+
my $rtmp = `wc -l $resume_file`;
687+
my ($res_cnt, $res_n) =split(" ", $rtmp);
688+
open(IN, $resume_file);
689+
while(my $line = <IN>){
690+
chomp($line);
691+
my @tcnt;
692+
if ($STRANDED =~ /TRUE/i){
693+
@tcnt = glob("$normdir/GENE/FINAL_SAM/*sense/$line.*temp*");
694+
}
695+
else{
696+
@tcnt = glob("$normdir/GENE/FINAL_SAM/$line.*temp*");
697+
}
698+
if (@tcnt > 0){
699+
$TEflag++;
700+
}
701+
}
702+
close(IN);
703+
# print "$TEflag\t$res_cnt\n";
704+
if ($TEflag ne $res_cnt){ #temp files don't exist
705+
$cat_flag = 1;
706+
my $tempname = $name;
707+
$tempname =~ s/runall_cat_genes_files_norm$/runall_sam2genes_gnorm_2/;
708+
$name = $tempname;
709+
}
710+
}
711+
# if resumed at runall_parseblastout, unless blastdb files exist,
712+
# go back one step and start from runall_runblast
713+
my $blast_flag = 0;
714+
if ($name =~ /runall_parseblastout$/){
715+
my $BDBflag = 0;
716+
my $err_name = "parseblastout.*.err";
717+
my $new_queue = "-mem $queue_3G";
718+
my $res = `perl $norm_script_dir/restart_failedjobs_only.pl $sample_dir $LOC \"$err_name\" \"$new_queue\" -qlist \"$qlist\"`;
719+
my $rtmp = `wc -l $resume_file`;
720+
my ($res_cnt, $res_n) =split(" ", $rtmp);
721+
open(IN, $resume_file);
722+
while(my $line = <IN>){
723+
chomp($line);
724+
my @tcnt = glob("$LOC/$line/blastdb*");
725+
if (@tcnt > 0){
726+
$BDBflag++;
727+
}
728+
}
729+
close(IN);
730+
# print "$BDBflag\t$res_cnt\n";
731+
if ($BDBflag ne $res_cnt){ #database files don't exist
732+
$blast_flag = 1;
733+
my $tempname = $name;
734+
$tempname =~ s/runall_parseblastout$/runall_runblast/;
735+
$name = $tempname;
736+
}
646737
}
647-
648738
my @a = split(/\./, $name);
649739
$name_to_check = $a[@a-1];
650740
my $get_num = $last_step;
@@ -655,16 +745,21 @@
655745
print LOG "\nJob number not provided. Setting it to 1.\n";
656746
}
657747
else{
658-
if ($cat_flag == 1){
748+
if (($cat_flag == 1) || ($blast_flag ==1)){
659749
$res_num--;
660-
if ($name =~ /_2$/){
661-
print LOG "\nCannot resume at runall_cat_genes_files_norm.\nResuming at the previous step...\n";
662-
}
663-
else{
664-
print LOG "\nCannot resume at runall_cat_genes_files.\nResuming at the previous step...\n";
665-
}
666750
}
667751
}
752+
if ($cat_flag == 1){
753+
if ($name =~ /_2$/){
754+
print LOG "\nCannot resume at runall_cat_genes_files_norm.\nResuming at the previous step...\n";
755+
}
756+
else{
757+
print LOG "\nCannot resume at runall_cat_genes_files.\nResuming at the previous step...\n";
758+
}
759+
}
760+
if ($blast_flag == 1){
761+
print LOG "Cannot resume at runall_parseblastout.\nResuming at the previous step...\n";
762+
}
668763
$length = length($res_num) + length($name) + 3;
669764
print LOG "\nRESUME at $res_num \"$name\"\n==========";
670765
for (my $i=0; $i < $length; $i++){
@@ -673,6 +768,7 @@
673768
print LOG "\n";
674769
$run_job = "false";
675770
}
771+
676772
if ($run_prepause eq "true"){
677773
$job_num = 1;
678774
if ($run_job eq "true"){
@@ -923,6 +1019,7 @@
9231019
$c_option = "$submit \\\"$batchjobs,$jobname, $request, $queue_6G, $stat\\\"";
9241020
}
9251021
$new_queue = "-mem $queue_6G";
1022+
9261023
while(qx{$stat | wc -l} > $maxjobs){
9271024
sleep(10);
9281025
}
@@ -1041,7 +1138,7 @@
10411138
chomp($numr);
10421139
my @xnumr = split(" " , $numr);
10431140
my $maxribo = $xnumr[0];
1044-
$maxribo =~ s/\,//;
1141+
$maxribo =~ s/\,//g;
10451142
if ($maxribo > 10000000){
10461143
$new_queue = "-mem $queue_6G";
10471144
if ($maxribo > 20000000){
@@ -1750,7 +1847,7 @@
17501847
chomp($numr);
17511848
my @xnumr = split(" " , $numr);
17521849
my $maxribo = $xnumr[0];
1753-
$maxribo =~ s/\,//;
1850+
$maxribo =~ s/\,//g;
17541851
if ($maxribo > 10000000){
17551852
$new_queue = "-mem $queue_6G";
17561853
if ($maxribo > 20000000){
@@ -2598,7 +2695,7 @@
25982695
print LOG "Check \"$study_dir/STATS/EXON_INTRON_JUNCTION/percent_high_expresser_*.txt\" \nUse \"-cutoff_highexp <n>\" option to set/change the highexpresser cutoff value.\n(You may use -cutoff_highexp 100 to unfilter/keep the highexpressers.)\n\n";
25992696
}
26002697
}
2601-
2698+
=comment
26022699
$default_input = `cat $shdir/runall_normalization.sh`;
26032700
$default_input =~ s/perl\ //g;
26042701
$default_input =~ s/runall_normalization.pl/run_normalization/g;
@@ -2610,6 +2707,7 @@
26102707
$default_input =~ s/\-se//;
26112708
$default_input =~ s/\'-resume_at'\ .+\ //;
26122709
$default_input =~ s/\-resume//;
2710+
=cut
26132711
print LOG "*************\nUse \"-part2\" option to continue:\n(do not change options other than the ones listed above)\n";
26142712
#print LOG "e.g. $default_input -part2\n*************\n";
26152713
}
@@ -2619,14 +2717,6 @@
26192717
print LOG "\nERROR: \"$study.$name_to_check\" step is not in [PART1].\n\tCannot resume at \"$study.$name_to_check\" step. Please check your pipeline option and -resume_at \"<step>\" option.\n\n";
26202718
}
26212719
}
2622-
if ($new_norm eq "true"){
2623-
for(my $i=0; $i<@ARGV; $i++) {
2624-
if ($ARGV[$i] eq "-alt_out"){
2625-
$normdir = $ARGV[$i+1];
2626-
$altstats = "-alt_stats $normdir/STATS/";
2627-
}
2628-
}
2629-
}
26302720
if ($run_norm eq "true"){
26312721
if ($run_prepause eq "false"){
26322722
$job_num = 1;
@@ -3395,7 +3485,7 @@
33953485

33963486
my $mem_quants = $mem;
33973487
if ($num_samples > 200){
3398-
$mem_quants = "$request$queue_6G";
3488+
$mem_quants = "$request$queue_10G";
33993489
}
34003490
$job = "echo \"perl $norm_script_dir/quants2spreadsheet_min_max.pl $sample_dir $LOC genequants $filter_highexp $data_stranded -normdir $normdir\" | $batchjobs $mem_quants $jobname \"$study.quants2spreadsheet_gnorm\" -o $logdir/$study.quants2spreadsheet_gnorm.out -e $logdir/$study.quants2spreadsheet_gnorm.err";
34013491

@@ -3689,6 +3779,7 @@
36893779
&check_err ($name_of_job, $err_name, $job_num);
36903780
$job_num++;
36913781
}
3782+
=comment
36923783
#exon2nonexon
36933784
$name_of_job = "$study.get_exon2nonexon_stats_p2";
36943785
if (($resume eq "true")&&($run_job eq "false")){
@@ -3765,7 +3856,7 @@
37653856
$job_num++;
37663857
}
37673858
}
3768-
3859+
=cut
37693860
#predict_num_reads EIJ p2
37703861
$name_of_job = "$study.predict_num_reads_p2";
37713862
if (($resume eq "true")&&($run_job eq "false")){

0 commit comments

Comments
 (0)