|
317 | 317 | $cutoff_le = $cutoff_temp; |
318 | 318 | } |
319 | 319 |
|
| 320 | +if ($new_norm eq "true"){ |
| 321 | + for(my $i=0; $i<@ARGV; $i++) { |
| 322 | + if ($ARGV[$i] eq "-alt_out"){ |
| 323 | + $normdir = $ARGV[$i+1]; |
| 324 | + $altstats = "-alt_stats $normdir/STATS/"; |
| 325 | + unless (-d "$normdir/STATS/"){ |
| 326 | + `mkdir -p $normdir/STATS/`; |
| 327 | + } |
| 328 | + } |
| 329 | + } |
| 330 | +} |
320 | 331 | #check for white spaces |
321 | 332 | my $to_trim = "false"; |
322 | 333 | open(DIRS, $sample_dir); |
|
596 | 607 | } |
597 | 608 | open(LOG, ">>$logfile"); |
598 | 609 | print LOG "\nPORT v0.8.3-beta\n"; |
599 | | -print LOG "\n*************\n$input\n*************\n"; |
| 610 | +my $default_input = $input; |
| 611 | +#$default_input = `cat $shdir/runall_normalization.sh`; |
| 612 | +$default_input =~ s/perl\ //g; |
| 613 | +$default_input =~ s/runall_normalization.pl/run_normalization/g; |
| 614 | +$default_input =~ s/\-fa\n//; |
| 615 | +$default_input =~ s/\-fq\n//; |
| 616 | +$default_input =~ s/\-sam //; |
| 617 | +$default_input =~ s/\-bam //; |
| 618 | +$default_input =~ s/\-gz//; |
| 619 | +$default_input =~ s/\-se//; |
| 620 | +print LOG "\n*************\n$default_input\n*************\n"; |
| 621 | + |
600 | 622 | if (-e "$logdir/$study.runall_normalization.out"){ |
601 | 623 | `rm $logdir/$study.runall_normalization.out`; |
602 | 624 |
|
|
629 | 651 | my @b = split(" ", $get_name); |
630 | 652 | $name = $b[@b-1]; |
631 | 653 | } |
632 | | - # if resumed at runall_cat_genes_files (or runall_cat_genes_files_norm step), go back one step and start from |
633 | | - # runall_sam2genes (or runall_sam2genes_2) |
| 654 | + # if resumed at runall_cat_genes_files (or runall_cat_genes_files_norm step), unless temp files exist, |
| 655 | + # go back one step and start from runall_sam2genes (or runall_sam2genes_2) |
| 656 | + my $cat_flag = 0; |
634 | 657 | if ($name =~ /runall_cat_genes_files$/){ |
635 | | - $cat_flag++; |
636 | | - my $tempname = $name; |
637 | | - $tempname =~ s/runall_cat_genes_files$/runall_sam2genes_gnorm/; |
638 | | - $name = $tempname; |
639 | | - |
| 658 | + my $TEflag = 0; |
| 659 | + my $err_name = "cat_genes.0.*.err"; |
| 660 | + my $new_queue = "-mem $queue_3G"; |
| 661 | + my $res = `perl $norm_script_dir/restart_failedjobs_only.pl $sample_dir $LOC \"$err_name\" \"$new_queue\" -qlist \"$qlist\"`; |
| 662 | + my $rtmp = `wc -l $resume_file`; |
| 663 | + my ($res_cnt, $res_n) = split(" ", $rtmp); |
| 664 | + open(IN, $resume_file); |
| 665 | + while(my $line = <IN>){ |
| 666 | + chomp($line); |
| 667 | + my @tcnt = glob("$LOC/$line/GNORM/*/*temp*"); |
| 668 | + if (@tcnt > 0){ |
| 669 | + $TEflag++; |
| 670 | + } |
| 671 | + } |
| 672 | + close(IN); |
| 673 | +# print "$TEflag\t$res_cnt\n"; |
| 674 | + if ($TEflag ne $res_cnt){ #temp files don't exist |
| 675 | + $cat_flag = 1; |
| 676 | + my $tempname = $name; |
| 677 | + $tempname =~ s/runall_cat_genes_files$/runall_sam2genes_gnorm/; |
| 678 | + $name = $tempname; |
| 679 | + } |
640 | 680 | } |
641 | 681 | if ($name =~ /runall_cat_genes_files_norm$/){ |
642 | | - $cat_flag++; |
643 | | - my $tempname = $name; |
644 | | - $tempname =~ s/runall_cat_genes_files_norm$/runall_sam2genes_gnorm_2/; |
645 | | - $name = $tempname; |
| 682 | + my $TEflag = 0; |
| 683 | + my $err_name = "cat_genes.1.*.err"; |
| 684 | + my $new_queue = "-mem $queue_3G"; |
| 685 | + my $res = `perl $norm_script_dir/restart_failedjobs_only.pl $sample_dir $LOC \"$err_name\" \"$new_queue\" -qlist \"$qlist\"`; |
| 686 | + my $rtmp = `wc -l $resume_file`; |
| 687 | + my ($res_cnt, $res_n) =split(" ", $rtmp); |
| 688 | + open(IN, $resume_file); |
| 689 | + while(my $line = <IN>){ |
| 690 | + chomp($line); |
| 691 | + my @tcnt; |
| 692 | + if ($STRANDED =~ /TRUE/i){ |
| 693 | + @tcnt = glob("$normdir/GENE/FINAL_SAM/*sense/$line.*temp*"); |
| 694 | + } |
| 695 | + else{ |
| 696 | + @tcnt = glob("$normdir/GENE/FINAL_SAM/$line.*temp*"); |
| 697 | + } |
| 698 | + if (@tcnt > 0){ |
| 699 | + $TEflag++; |
| 700 | + } |
| 701 | + } |
| 702 | + close(IN); |
| 703 | +# print "$TEflag\t$res_cnt\n"; |
| 704 | + if ($TEflag ne $res_cnt){ #temp files don't exist |
| 705 | + $cat_flag = 1; |
| 706 | + my $tempname = $name; |
| 707 | + $tempname =~ s/runall_cat_genes_files_norm$/runall_sam2genes_gnorm_2/; |
| 708 | + $name = $tempname; |
| 709 | + } |
| 710 | + } |
| 711 | + # if resumed at runall_parseblastout, unless blastdb files exist, |
| 712 | + # go back one step and start from runall_runblast |
| 713 | + my $blast_flag = 0; |
| 714 | + if ($name =~ /runall_parseblastout$/){ |
| 715 | + my $BDBflag = 0; |
| 716 | + my $err_name = "parseblastout.*.err"; |
| 717 | + my $new_queue = "-mem $queue_3G"; |
| 718 | + my $res = `perl $norm_script_dir/restart_failedjobs_only.pl $sample_dir $LOC \"$err_name\" \"$new_queue\" -qlist \"$qlist\"`; |
| 719 | + my $rtmp = `wc -l $resume_file`; |
| 720 | + my ($res_cnt, $res_n) =split(" ", $rtmp); |
| 721 | + open(IN, $resume_file); |
| 722 | + while(my $line = <IN>){ |
| 723 | + chomp($line); |
| 724 | + my @tcnt = glob("$LOC/$line/blastdb*"); |
| 725 | + if (@tcnt > 0){ |
| 726 | + $BDBflag++; |
| 727 | + } |
| 728 | + } |
| 729 | + close(IN); |
| 730 | +# print "$BDBflag\t$res_cnt\n"; |
| 731 | + if ($BDBflag ne $res_cnt){ #database files don't exist |
| 732 | + $blast_flag = 1; |
| 733 | + my $tempname = $name; |
| 734 | + $tempname =~ s/runall_parseblastout$/runall_runblast/; |
| 735 | + $name = $tempname; |
| 736 | + } |
646 | 737 | } |
647 | | - |
648 | 738 | my @a = split(/\./, $name); |
649 | 739 | $name_to_check = $a[@a-1]; |
650 | 740 | my $get_num = $last_step; |
|
655 | 745 | print LOG "\nJob number not provided. Setting it to 1.\n"; |
656 | 746 | } |
657 | 747 | else{ |
658 | | - if ($cat_flag == 1){ |
| 748 | + if (($cat_flag == 1) || ($blast_flag ==1)){ |
659 | 749 | $res_num--; |
660 | | - if ($name =~ /_2$/){ |
661 | | - print LOG "\nCannot resume at runall_cat_genes_files_norm.\nResuming at the previous step...\n"; |
662 | | - } |
663 | | - else{ |
664 | | - print LOG "\nCannot resume at runall_cat_genes_files.\nResuming at the previous step...\n"; |
665 | | - } |
666 | 750 | } |
667 | 751 | } |
| 752 | + if ($cat_flag == 1){ |
| 753 | + if ($name =~ /_2$/){ |
| 754 | + print LOG "\nCannot resume at runall_cat_genes_files_norm.\nResuming at the previous step...\n"; |
| 755 | + } |
| 756 | + else{ |
| 757 | + print LOG "\nCannot resume at runall_cat_genes_files.\nResuming at the previous step...\n"; |
| 758 | + } |
| 759 | + } |
| 760 | + if ($blast_flag == 1){ |
| 761 | + print LOG "Cannot resume at runall_parseblastout.\nResuming at the previous step...\n"; |
| 762 | + } |
668 | 763 | $length = length($res_num) + length($name) + 3; |
669 | 764 | print LOG "\nRESUME at $res_num \"$name\"\n=========="; |
670 | 765 | for (my $i=0; $i < $length; $i++){ |
|
673 | 768 | print LOG "\n"; |
674 | 769 | $run_job = "false"; |
675 | 770 | } |
| 771 | + |
676 | 772 | if ($run_prepause eq "true"){ |
677 | 773 | $job_num = 1; |
678 | 774 | if ($run_job eq "true"){ |
|
923 | 1019 | $c_option = "$submit \\\"$batchjobs,$jobname, $request, $queue_6G, $stat\\\""; |
924 | 1020 | } |
925 | 1021 | $new_queue = "-mem $queue_6G"; |
| 1022 | + |
926 | 1023 | while(qx{$stat | wc -l} > $maxjobs){ |
927 | 1024 | sleep(10); |
928 | 1025 | } |
|
1041 | 1138 | chomp($numr); |
1042 | 1139 | my @xnumr = split(" " , $numr); |
1043 | 1140 | my $maxribo = $xnumr[0]; |
1044 | | - $maxribo =~ s/\,//; |
| 1141 | + $maxribo =~ s/\,//g; |
1045 | 1142 | if ($maxribo > 10000000){ |
1046 | 1143 | $new_queue = "-mem $queue_6G"; |
1047 | 1144 | if ($maxribo > 20000000){ |
|
1750 | 1847 | chomp($numr); |
1751 | 1848 | my @xnumr = split(" " , $numr); |
1752 | 1849 | my $maxribo = $xnumr[0]; |
1753 | | - $maxribo =~ s/\,//; |
| 1850 | + $maxribo =~ s/\,//g; |
1754 | 1851 | if ($maxribo > 10000000){ |
1755 | 1852 | $new_queue = "-mem $queue_6G"; |
1756 | 1853 | if ($maxribo > 20000000){ |
|
2598 | 2695 | print LOG "Check \"$study_dir/STATS/EXON_INTRON_JUNCTION/percent_high_expresser_*.txt\" \nUse \"-cutoff_highexp <n>\" option to set/change the highexpresser cutoff value.\n(You may use -cutoff_highexp 100 to unfilter/keep the highexpressers.)\n\n"; |
2599 | 2696 | } |
2600 | 2697 | } |
2601 | | - |
| 2698 | +=comment |
2602 | 2699 | $default_input = `cat $shdir/runall_normalization.sh`; |
2603 | 2700 | $default_input =~ s/perl\ //g; |
2604 | 2701 | $default_input =~ s/runall_normalization.pl/run_normalization/g; |
|
2610 | 2707 | $default_input =~ s/\-se//; |
2611 | 2708 | $default_input =~ s/\'-resume_at'\ .+\ //; |
2612 | 2709 | $default_input =~ s/\-resume//; |
| 2710 | +=cut |
2613 | 2711 | print LOG "*************\nUse \"-part2\" option to continue:\n(do not change options other than the ones listed above)\n"; |
2614 | 2712 | #print LOG "e.g. $default_input -part2\n*************\n"; |
2615 | 2713 | } |
|
2619 | 2717 | print LOG "\nERROR: \"$study.$name_to_check\" step is not in [PART1].\n\tCannot resume at \"$study.$name_to_check\" step. Please check your pipeline option and -resume_at \"<step>\" option.\n\n"; |
2620 | 2718 | } |
2621 | 2719 | } |
2622 | | -if ($new_norm eq "true"){ |
2623 | | - for(my $i=0; $i<@ARGV; $i++) { |
2624 | | - if ($ARGV[$i] eq "-alt_out"){ |
2625 | | - $normdir = $ARGV[$i+1]; |
2626 | | - $altstats = "-alt_stats $normdir/STATS/"; |
2627 | | - } |
2628 | | - } |
2629 | | -} |
2630 | 2720 | if ($run_norm eq "true"){ |
2631 | 2721 | if ($run_prepause eq "false"){ |
2632 | 2722 | $job_num = 1; |
|
3395 | 3485 |
|
3396 | 3486 | my $mem_quants = $mem; |
3397 | 3487 | if ($num_samples > 200){ |
3398 | | - $mem_quants = "$request$queue_6G"; |
| 3488 | + $mem_quants = "$request$queue_10G"; |
3399 | 3489 | } |
3400 | 3490 | $job = "echo \"perl $norm_script_dir/quants2spreadsheet_min_max.pl $sample_dir $LOC genequants $filter_highexp $data_stranded -normdir $normdir\" | $batchjobs $mem_quants $jobname \"$study.quants2spreadsheet_gnorm\" -o $logdir/$study.quants2spreadsheet_gnorm.out -e $logdir/$study.quants2spreadsheet_gnorm.err"; |
3401 | 3491 |
|
|
3689 | 3779 | &check_err ($name_of_job, $err_name, $job_num); |
3690 | 3780 | $job_num++; |
3691 | 3781 | } |
| 3782 | +=comment |
3692 | 3783 | #exon2nonexon |
3693 | 3784 | $name_of_job = "$study.get_exon2nonexon_stats_p2"; |
3694 | 3785 | if (($resume eq "true")&&($run_job eq "false")){ |
|
3765 | 3856 | $job_num++; |
3766 | 3857 | } |
3767 | 3858 | } |
3768 | | - |
| 3859 | +=cut |
3769 | 3860 | #predict_num_reads EIJ p2 |
3770 | 3861 | $name_of_job = "$study.predict_num_reads_p2"; |
3771 | 3862 | if (($resume eq "true")&&($run_job eq "false")){ |
|
0 commit comments