From 894f915c69bebc9252eb0c9329b32c54480daeae Mon Sep 17 00:00:00 2001 From: jun-wan Date: Mon, 10 Nov 2025 10:22:35 +0100 Subject: [PATCH 1/4] Add new script for create rnaseq samplesheet --- create_rnaseq_samplesheet.py | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100755 create_rnaseq_samplesheet.py diff --git a/create_rnaseq_samplesheet.py b/create_rnaseq_samplesheet.py new file mode 100755 index 0000000..b21b7d0 --- /dev/null +++ b/create_rnaseq_samplesheet.py @@ -0,0 +1,36 @@ +import os +import sys +import glob + +usage=""" + Create a samplesheet with information about the samples for a given project before running the nf-core/rnaseq analysis pipeline + For detailed sescription, please see: https://nf-co.re/rnaseq/usage#samplesheet-input + +Usage: + create_rnaseq_samplesheet.py + eg. create_rnaseq_samplesheet.py P001 auto >P001.csv + +Output: + + CSV lines print to ScreenOut + +""" + +if len(sys.argv) < 3: + sys.exit(usage) + +project = sys.argv[1] +strandedness = sys.argv[2] #forward/reverse/unstranded/auto +data_path=os.path.join('/proj/ngi2016003/nobackup/NGI/DATA',project) +header="sample,fastq_1,fastq_2,strandedness" +sampleList=os.listdir(data_path) +sampleList.sort() +print(header) +for sample in sampleList: + path_pattern = os.path.join(data_path, sample, '*/*/*R1*.gz') + paths = glob.glob(path_pattern) + + for counter, R1 in enumerate(paths, 1): + index=str(counter) + R2 = R1.replace('_R1_','_R2_') + print(sample + ',' + R1 + ',' + R2 + ',' + strandedness) From de49164e7c0cb0f62c16f94c469a5cd981edc2af Mon Sep 17 00:00:00 2001 From: jun-wan Date: Mon, 10 Nov 2025 13:30:36 +0100 Subject: [PATCH 2/4] Update with the help info. --- create_rnaseq_samplesheet.py | 42 +++++++++++++++++++++++++++++------- 1 file changed, 34 insertions(+), 8 deletions(-) diff --git a/create_rnaseq_samplesheet.py b/create_rnaseq_samplesheet.py index b21b7d0..d38b310 100755 --- a/create_rnaseq_samplesheet.py +++ b/create_rnaseq_samplesheet.py @@ -1,20 +1,46 @@ import os import sys import glob +import argparse -usage=""" - Create a samplesheet with information about the samples for a given project before running the nf-core/rnaseq analysis pipeline - For detailed sescription, please see: https://nf-co.re/rnaseq/usage#samplesheet-input +class CustomParser(argparse.ArgumentParser): + def print_help(self, file=None): + help_text = """ -Usage: - create_rnaseq_samplesheet.py - eg. create_rnaseq_samplesheet.py P001 auto >P001.csv +Description: +Generate an RNA-seq sample sheet for a given project before running the nf-core/rnaseq analysis pipeline. +The resulting CSV is printed to the screen (stdout). To save the CSV to a file, use shell redirection. -Output: +USAGE: + create_rnaseq_samplesheet.py > - CSV lines print to ScreenOut +Examples: + create_rnaseq_samplesheet.py P001 auto > P001.csv # Create a sample sheet in the current folder + create_rnaseq_samplesheet.py P001 auto > /path/to/P001.csv # Create a sample sheet and save to a specific location +Arguments: + ProjectID Identifier for your RNA-seq project (e.g., P001) + Strandedness Library strandedness (forward/reverse/unstranded/auto, use 'auto' to auto-detect) + +Optional arguments: + -h, --help Show this help message and exit """ + print(help_text) + +def main(): + parser = CustomParser(add_help=False) # disable default help + parser.add_argument("ProjectID", help="Identifier for your RNA-seq project (e.g., P001)") + parser.add_argument("Strandedness", help="Library strandedness (use 'auto' to auto-detect)") + parser.add_argument("-h", "--help", action="help", help="show this help message and exit") + + args = parser.parse_args() + + # Generate CSV content + csv_content = f"# Sample sheet for project {args.ProjectID}\nStrandedness,{args.Strandedness}\n" + print(csv_content) + +if __name__ == "__main__": + main() if len(sys.argv) < 3: sys.exit(usage) From 0f9b1bac8092ef77647db7bc2b1b44aa9ee21f15 Mon Sep 17 00:00:00 2001 From: jun-wan Date: Mon, 10 Nov 2025 14:29:47 +0100 Subject: [PATCH 3/4] Update with specifying data path --- create_rnaseq_samplesheet.py | 57 ++++++++++++++++++++---------------- 1 file changed, 32 insertions(+), 25 deletions(-) mode change 100755 => 100644 create_rnaseq_samplesheet.py diff --git a/create_rnaseq_samplesheet.py b/create_rnaseq_samplesheet.py old mode 100755 new mode 100644 index d38b310..bfcf04e --- a/create_rnaseq_samplesheet.py +++ b/create_rnaseq_samplesheet.py @@ -1,3 +1,4 @@ +#!/usr/bin/env python3 import os import sys import glob @@ -12,51 +13,57 @@ def print_help(self, file=None): The resulting CSV is printed to the screen (stdout). To save the CSV to a file, use shell redirection. USAGE: - create_rnaseq_samplesheet.py > + create_rnaseq_samplesheet.py [-d ] > Examples: - create_rnaseq_samplesheet.py P001 auto > P001.csv # Create a sample sheet in the current folder - create_rnaseq_samplesheet.py P001 auto > /path/to/P001.csv # Create a sample sheet and save to a specific location + create_rnaseq_samplesheet.py P001 auto > P001.csv + create_rnaseq_samplesheet.py P001 auto -d /my/data/path > P001.csv Arguments: ProjectID Identifier for your RNA-seq project (e.g., P001) Strandedness Library strandedness (forward/reverse/unstranded/auto, use 'auto' to auto-detect) Optional arguments: + -d, --data Path to your RNA-seq data folder. Default: /proj/ngi2016003/nobackup/NGI/DATA -h, --help Show this help message and exit """ print(help_text) def main(): - parser = CustomParser(add_help=False) # disable default help + parser = CustomParser(add_help=False) parser.add_argument("ProjectID", help="Identifier for your RNA-seq project (e.g., P001)") parser.add_argument("Strandedness", help="Library strandedness (use 'auto' to auto-detect)") - parser.add_argument("-h", "--help", action="help", help="show this help message and exit") + parser.add_argument("-d", "--data", default="/proj/ngi2016003/nobackup/NGI/DATA", + help="Path to RNA-seq data (default: %(default)s)") + parser.add_argument("-h", "--help", action="help", help="Show this help message and exit") args = parser.parse_args() # Generate CSV content - csv_content = f"# Sample sheet for project {args.ProjectID}\nStrandedness,{args.Strandedness}\n" - print(csv_content) + print(f"# Sample sheet for project {args.ProjectID}") + print(f"Strandedness,{args.Strandedness}") + + # Build full path to project data + data_path = os.path.join(args.data, args.ProjectID) + + if not os.path.exists(data_path): + sys.exit(f"Error: data path does not exist: {data_path}") + + header = "sample,fastq_1,fastq_2,strandedness" + print(header) + + sampleList = os.listdir(data_path) + sampleList.sort() + + for sample in sampleList: + path_pattern = os.path.join(data_path, sample, '*/*/*R1*.gz') + paths = glob.glob(path_pattern) + + for counter, R1 in enumerate(paths, 1): + R2 = R1.replace('_R1_','_R2_') + print(f"{sample},{R1},{R2},{args.Strandedness}") + if __name__ == "__main__": main() -if len(sys.argv) < 3: - sys.exit(usage) - -project = sys.argv[1] -strandedness = sys.argv[2] #forward/reverse/unstranded/auto -data_path=os.path.join('/proj/ngi2016003/nobackup/NGI/DATA',project) -header="sample,fastq_1,fastq_2,strandedness" -sampleList=os.listdir(data_path) -sampleList.sort() -print(header) -for sample in sampleList: - path_pattern = os.path.join(data_path, sample, '*/*/*R1*.gz') - paths = glob.glob(path_pattern) - - for counter, R1 in enumerate(paths, 1): - index=str(counter) - R2 = R1.replace('_R1_','_R2_') - print(sample + ',' + R1 + ',' + R2 + ',' + strandedness) From 6e1d5b715c23376fc8c7dfc001ac9ea9243d4c8c Mon Sep 17 00:00:00 2001 From: jun-wan Date: Mon, 10 Nov 2025 15:10:19 +0100 Subject: [PATCH 4/4] Update with specifying any data path --- create_rnaseq_samplesheet.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/create_rnaseq_samplesheet.py b/create_rnaseq_samplesheet.py index bfcf04e..9028045 100644 --- a/create_rnaseq_samplesheet.py +++ b/create_rnaseq_samplesheet.py @@ -56,8 +56,8 @@ def main(): sampleList.sort() for sample in sampleList: - path_pattern = os.path.join(data_path, sample, '*/*/*R1*.gz') - paths = glob.glob(path_pattern) + path_pattern = os.path.join(data_path, sample, '**', '*R1*.gz') + paths = glob.glob(path_pattern, recursive=True) for counter, R1 in enumerate(paths, 1): R2 = R1.replace('_R1_','_R2_')