UCSF-DSCOLAB · erflynn · Jun 27, 2025 · Jun 27, 2025 · Jun 27, 2025 · Jul 1, 2025
diff --git a/bulk_RNASeq/config/base.config b/bulk_RNASeq/config/base.config
@@ -10,6 +10,7 @@ profiles {
         process.executor = 'slurm'
 	    executor.queueSize = 60
         process.cache = 'lenient'
+	   process.scratch = true
 	    trace.enabled = true
         trace.taskMemory = true
         withLabel: 'per_sample' {
@@ -23,6 +24,7 @@ profiles {
         process.cache = 'lenient'
         process.executor = 'sge'
         process.penv = 'smp'
+	   process.scratch = true
         clusterOptions = '-S /bin/bash'
         withLabel: 'per_sample' {
             errorStrategy = 'finish'

diff --git a/bulk_RNASeq/modules/bcftools_sort_vcf.nf b/bulk_RNASeq/modules/bcftools_sort_vcf.nf
@@ -8,6 +8,7 @@ process BCFTOOLS_SORT_VCF {
         fileSize = vcf.size() / (1024 * 1024 * 1024)
         return 1.GB + (1.GB * fileSize * 0.01)
     }
+    containerOptions "-B /scratch/"
 
     input:
     tuple val(meta), path(vcf)
@@ -24,7 +25,7 @@ process BCFTOOLS_SORT_VCF {
     """
     bcftools sort \\
             --output ${prefix}.sorted.vcf.gz -Oz \\
-            --temp-dir \$PWD \\
+            --temp-dir \$TMPDIR/ \\
             $vcf
     """
 }
diff --git a/bulk_RNASeq/modules/fastp_trim_adapters.nf b/bulk_RNASeq/modules/fastp_trim_adapters.nf
@@ -15,6 +15,7 @@ process FASTP_TRIM_ADAPTERS {
 	}
         return 10.GB * (1 + (fileSize * 2))
     }
+    publishDir "${params.results_directory}/trimmed_reads", mode: 'copy'
 
     input:
     tuple val(meta), path(reads)

diff --git a/bulk_RNASeq/modules/gatk4_apply_bqsr.nf b/bulk_RNASeq/modules/gatk4_apply_bqsr.nf
@@ -8,6 +8,8 @@ process GATK4_APPLY_BQSR {
         return 1.GB + (2.GB * fileSize * 0.1)
     }
 
+    containerOptions "-B /scratch/"
+
     input:
     tuple val(meta), path(input), path(input_index), path(bqsr_table)
     path  genome
@@ -30,7 +32,7 @@ process GATK4_APPLY_BQSR {
         --output ${prefix}_bqsr.bam \\
         --reference $genome \\
         --bqsr-recal-file $bqsr_table \\
-        --tmp-dir \$PWD \\
+        --tmp-dir \$TMPDIR \\
         $args
     """
 }
diff --git a/bulk_RNASeq/modules/gatk4_haplotype_caller.nf b/bulk_RNASeq/modules/gatk4_haplotype_caller.nf
@@ -9,6 +9,9 @@ process GATK4_HAPLOTYPECALLER {
         return 17.GB + (1.GB * fileSize * 3)
     }
 
+    containerOptions "-B /scratch/"
+
+
     input:
     tuple val(meta), path(input), path(input_index)
     path  fasta
@@ -41,7 +44,7 @@ process GATK4_HAPLOTYPECALLER {
         --output ${prefix}.vcf.gz \\
         $reference_command \\
         $dbsnp_command \\
-        --tmp-dir \$PWD \\
+        --tmp-dir \$TMPDIR \\
         $soft_clipped \\
         $min_conf \\
         $min_pruning \\

diff --git a/bulk_RNASeq/modules/gatk4_recalibrator.nf b/bulk_RNASeq/modules/gatk4_recalibrator.nf
@@ -7,6 +7,8 @@ process GATK4_BASE_RECALIBRATOR {
         fileSize = input.size() / (1024 * 1024 * 1024)
         return 5.GB + (1.GB * fileSize * 0.1)
     }
+    containerOptions "-B /scratch/"
+
 
     input:
     tuple val(meta), path(input)
@@ -34,7 +36,7 @@ process GATK4_BASE_RECALIBRATOR {
         --output ${prefix}.table \\
         --reference $fasta \\
         $sites_command \\
-        --tmp-dir \$PWD \\
+        --tmp-dir \$TMPDIR \\
         $args
     """
 }
diff --git a/bulk_RNASeq/modules/gatk4_splitncigar.nf b/bulk_RNASeq/modules/gatk4_splitncigar.nf
@@ -7,6 +7,7 @@ process GATK4_SPLITNCIGARREADS {
         fileSize = bam.size() / (1024 * 1024 * 1024)
         return 200.GB + (1.GB * fileSize * 5)
     }
+    containerOptions "-B /scratch/"
 
     input:
     tuple val(meta), path(bam), path(bai)
@@ -30,7 +31,7 @@ process GATK4_SPLITNCIGARREADS {
         --input $bam \\
         --output ${prefix}.bam \\
         --reference $genome \\
-        --tmp-dir \$PWD \\
+        --tmp-dir \$TMPDIR \\
         $args
     """
 }
diff --git a/bulk_RNASeq/modules/gatk4_variant_filter.nf b/bulk_RNASeq/modules/gatk4_variant_filter.nf
@@ -9,6 +9,8 @@ process GATK4_VARIANTFILTRATION {
         return 5.GB + (1.GB * fileSize)
     }
 
+    containerOptions "-B /scratch/"
+
     input:
     tuple val(meta), path(vcf), path(tbi)
     path  fasta
@@ -34,7 +36,7 @@ process GATK4_VARIANTFILTRATION {
         --reference $fasta \\
         --window $params.gatk_vf_window_size \\
         --output ${prefix}.filtered.vcf.gz \\
-        --tmp-dir \$PWD \\
+        --tmp-dir \$TMPDIR \\
         $args
     """
 }
diff --git a/bulk_RNASeq/modules/kallisto_quant.nf b/bulk_RNASeq/modules/kallisto_quant.nf
@@ -10,7 +10,11 @@ process KALLISTO_QUANT {
           // File size in GB
           fileSize = reads[0].size() / (1024 * 1024 * 1024)
         }
-	return 7.GB * (1 + (fileSize*0.25))
+	if (fileSize < 15){
+	   return 10.GB
+	} else {
+           return 7.GB * (1 + (fileSize*0.25))
+	}
     }
     publishDir "${params.results_directory}/kallisto", mode: 'copy'
 

diff --git a/bulk_RNASeq/modules/samtools_bam_to_cram.nf b/bulk_RNASeq/modules/samtools_bam_to_cram.nf
@@ -4,9 +4,9 @@ process SAMTOOLS_BAM_TO_CRAM {
     label 'samtools_bam_to_cram'
     publishDir "${params.results_directory}/star", mode: 'copy'
     memory {
-        // File size in GB
-        fileSize = bam.size() / (1024 * 1024 * 1024)
-        return 1.GB + (1.GB * fileSize * 0.001)
+        def sizeGiB   = Math.ceil( bam.size() / (1024 ** 3) )
+        def required  = 1.GB + sizeGiB * 1.GB
+        return required < 8.GB ? 8.GB : required   // equivalent to Math.max(required, 8.GB)
     }
 
     input:

diff --git a/bulk_RNASeq/modules/sortmerna_rrna_removal.nf b/bulk_RNASeq/modules/sortmerna_rrna_removal.nf
@@ -15,6 +15,9 @@ process SORTMERNA_RIBOSOMAL_RNA_REMOVAL {
         }
         return 15.GB * (1 + (fileSize * 0.1))
     }
+    publishDir "${params.results_directory}/trimmed_cleaned_reads", mode: 'copy'
+
+    containerOptions "-B /scratch/"
 
     input:
     tuple val(meta), path(reads)
@@ -37,7 +40,7 @@ process SORTMERNA_RIBOSOMAL_RNA_REMOVAL {
             --ref $refs \\
             --reads $reads \\
             --threads $task.cpus \\
-            --workdir . \\
+            --workdir \$TMPDIR/ \\
             --aligned rRNA_reads \\
             --fastx \\
             --other non_rRNA_reads \\
@@ -53,7 +56,7 @@ process SORTMERNA_RIBOSOMAL_RNA_REMOVAL {
             --reads ${reads[0]} \\
             --reads ${reads[1]} \\
             --threads $task.cpus \\
-            --workdir . \\
+            --workdir \$TMPDIR/ \\
             --aligned rRNA_reads \\
             --fastx \\
             --other non_rRNA_reads \\

diff --git a/bulk_RNASeq/modules/star_align.nf b/bulk_RNASeq/modules/star_align.nf
@@ -3,15 +3,18 @@ process STAR_ALIGN {
     // clusterOptions = '-S /bin/bash'
     label 'star_align', 'per_sample'
     memory {
-        if (meta.single_end) {
-          // File size in GB
-          fileSize = reads.size() / (1024 * 1024 * 1024)
-        } else {
-          // File size in GB
-          fileSize = reads[0].size() / (1024 * 1024 * 1024)
-        }
-	return 25.GB * (2 + (fileSize*0.1))
+      /* Size of the reads in GiB */
+      def fileSizeGB = meta.single_end
+                      ? (reads.size() / (1024 ** 3))
+                      : (reads[0].size()    / (1024 ** 3))
+
+      /* Formula-based requirement: 25 GB × ( 2 + 0.1 × size ) */
+      def required = 25.GB * ( 2 + (fileSizeGB * 0.1) )
+
+      /* Never ask for less than 8 GB */
+      return [required, 25.GB].max()          // equivalent to Math.max(required, 8.GB)
     }
+    containerOptions "-B /scratch/"
     publishDir "${params.results_directory}/star", mode: 'copy', pattern: "${prefix}ReadsPerGene.out.tab"
     publishDir "${params.results_directory}/star", mode: 'copy', pattern: "${prefix}Log.final.out"
 
@@ -44,6 +47,7 @@ process STAR_ALIGN {
 	      --outSAMunmapped Within KeepPairs \
         --outSAMattrRGline ID:$prefix SM:$prefix LB:library PL:illumina \
         --outFileNamePrefix $prefix \\
+        --outTmpDir $task.scratch \\
         --outFilterMismatchNoverLmax ${params.star_outfilter_mismatch_n_over_lmax} \\
         --alignSJoverhangMin ${params.star_align_sjoverhang_min} \\
         --outFilterMultimapNmax ${params.star_outfilter_multimap_nmax} \\

diff --git a/bulk_RNASeq/run_pipeline_c4.sh b/bulk_RNASeq/run_pipeline_c4.sh
@@ -7,7 +7,9 @@
 #SBATCH --exclude=c4-n20
 
 # to run:
-#   sbatch ./run_pipeline.sh -profile hpc
+#   sbatch ./run_pipeline_c4.sh
+# to resume:
+#.  sbatch ./run_pipeline_c4.sh -resume
 
 # Arugments:
 # pass as many additional arguments to nextflow as you'd like (e.g. -resume, -with-timeline, -profile test)
-Original file line number
+Diff line change
@@ Expand Up / @@ -15,6 +15,7 @@ process FASTP_TRIM_ADAPTERS { @@
     	}
             return 10.GB * (1 + (fileSize * 2))
         }
+        publishDir "${params.results_directory}/trimmed_reads", mode: 'copy'
         input:
         tuple val(meta), path(reads)
@@ Expand Down @@