From 894f915c69bebc9252eb0c9329b32c54480daeae Mon Sep 17 00:00:00 2001
From: jun-wan <jun.wang@scilifelab.se>
Date: Mon, 10 Nov 2025 10:22:35 +0100
Subject: [PATCH 1/4] Add new script for create rnaseq samplesheet

---
 create_rnaseq_samplesheet.py | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)
 create mode 100755 create_rnaseq_samplesheet.py
diff --git a/create_rnaseq_samplesheet.py b/create_rnaseq_samplesheet.py
new file mode 100755
index 0000000..b21b7d0
--- /dev/null
+++ b/create_rnaseq_samplesheet.py
@@ -0,0 +1,36 @@
+import os
+import sys
+import glob
+
+usage="""
+    Create a samplesheet with information about the samples for a given project before running the nf-core/rnaseq analysis pipeline
+    For detailed sescription, please see: https://nf-co.re/rnaseq/usage#samplesheet-input
+
+Usage:
+        create_rnaseq_samplesheet.py <ProjectID> <Strandedness>
+    eg. create_rnaseq_samplesheet.py P001 auto >P001.csv
+
+Output:
+
+        CSV lines print to ScreenOut 
+
+"""
+
+if len(sys.argv) < 3:
+        sys.exit(usage)
+
+project = sys.argv[1]
+strandedness = sys.argv[2]	#forward/reverse/unstranded/auto
+data_path=os.path.join('/proj/ngi2016003/nobackup/NGI/DATA',project)
+header="sample,fastq_1,fastq_2,strandedness"
+sampleList=os.listdir(data_path)
+sampleList.sort()
+print(header)
+for sample in sampleList:
+	path_pattern = os.path.join(data_path, sample, '*/*/*R1*.gz')
+	paths = glob.glob(path_pattern)
+
+	for counter, R1 in enumerate(paths, 1):
+		index=str(counter)
+		R2 = R1.replace('_R1_','_R2_')
+		print(sample + ',' + R1 + ',' + R2 + ',' + strandedness)

From de49164e7c0cb0f62c16f94c469a5cd981edc2af Mon Sep 17 00:00:00 2001
From: jun-wan <jun.wang@scilifelab.se>
Date: Mon, 10 Nov 2025 13:30:36 +0100
Subject: [PATCH 2/4] Update with the help info.

---
 create_rnaseq_samplesheet.py | 42 +++++++++++++++++++++++++++++-------
 1 file changed, 34 insertions(+), 8 deletions(-)

diff --git a/create_rnaseq_samplesheet.py b/create_rnaseq_samplesheet.py
index b21b7d0..d38b310 100755
--- a/create_rnaseq_samplesheet.py
+++ b/create_rnaseq_samplesheet.py
@@ -1,20 +1,46 @@
 import os
 import sys
 import glob
+import argparse
 
-usage="""
-    Create a samplesheet with information about the samples for a given project before running the nf-core/rnaseq analysis pipeline
-    For detailed sescription, please see: https://nf-co.re/rnaseq/usage#samplesheet-input
+class CustomParser(argparse.ArgumentParser):
+    def print_help(self, file=None):
+        help_text = """
 
-Usage:
-        create_rnaseq_samplesheet.py <ProjectID> <Strandedness>
-    eg. create_rnaseq_samplesheet.py P001 auto >P001.csv
+Description:
+Generate an RNA-seq sample sheet for a given project before running the nf-core/rnaseq analysis pipeline.
+The resulting CSV is printed to the screen (stdout). To save the CSV to a file, use shell redirection.
 
-Output:
+USAGE:
+  create_rnaseq_samplesheet.py <ProjectID> <Strandedness> > <output.csv>
 
-        CSV lines print to ScreenOut 
+Examples:
+  create_rnaseq_samplesheet.py P001 auto > P001.csv           # Create a sample sheet in the current folder
+  create_rnaseq_samplesheet.py P001 auto > /path/to/P001.csv  # Create a sample sheet and save to a specific location
 
+Arguments:
+  ProjectID      Identifier for your RNA-seq project (e.g., P001)
+  Strandedness   Library strandedness (forward/reverse/unstranded/auto, use 'auto' to auto-detect)
+
+Optional arguments:
+  -h, --help     Show this help message and exit
 """
+        print(help_text)
+
+def main():
+    parser = CustomParser(add_help=False)  # disable default help
+    parser.add_argument("ProjectID", help="Identifier for your RNA-seq project (e.g., P001)")
+    parser.add_argument("Strandedness", help="Library strandedness (use 'auto' to auto-detect)")
+    parser.add_argument("-h", "--help", action="help", help="show this help message and exit")
+
+    args = parser.parse_args()
+
+    # Generate CSV content
+    csv_content = f"# Sample sheet for project {args.ProjectID}\nStrandedness,{args.Strandedness}\n"
+    print(csv_content)
+
+if __name__ == "__main__":
+    main()
 
 if len(sys.argv) < 3:
         sys.exit(usage)

From 0f9b1bac8092ef77647db7bc2b1b44aa9ee21f15 Mon Sep 17 00:00:00 2001
From: jun-wan <jun.wang@scilifelab.se>
Date: Mon, 10 Nov 2025 14:29:47 +0100
Subject: [PATCH 3/4] Update with specifying data path

---
 create_rnaseq_samplesheet.py | 57 ++++++++++++++++++++----------------
 1 file changed, 32 insertions(+), 25 deletions(-)
 mode change 100755 => 100644 create_rnaseq_samplesheet.py

diff --git a/create_rnaseq_samplesheet.py b/create_rnaseq_samplesheet.py
old mode 100755
new mode 100644
index d38b310..bfcf04e
--- a/create_rnaseq_samplesheet.py
+++ b/create_rnaseq_samplesheet.py
@@ -1,3 +1,4 @@
+#!/usr/bin/env python3
 import os
 import sys
 import glob
@@ -12,51 +13,57 @@ def print_help(self, file=None):
 The resulting CSV is printed to the screen (stdout). To save the CSV to a file, use shell redirection.
 
 USAGE:
-  create_rnaseq_samplesheet.py <ProjectID> <Strandedness> > <output.csv>
+  create_rnaseq_samplesheet.py <ProjectID> <Strandedness> [-d <data_path>] > <output.csv>
 
 Examples:
-  create_rnaseq_samplesheet.py P001 auto > P001.csv           # Create a sample sheet in the current folder
-  create_rnaseq_samplesheet.py P001 auto > /path/to/P001.csv  # Create a sample sheet and save to a specific location
+  create_rnaseq_samplesheet.py P001 auto > P001.csv
+  create_rnaseq_samplesheet.py P001 auto -d /my/data/path > P001.csv
 
 Arguments:
   ProjectID      Identifier for your RNA-seq project (e.g., P001)
   Strandedness   Library strandedness (forward/reverse/unstranded/auto, use 'auto' to auto-detect)
 
 Optional arguments:
+  -d, --data     Path to your RNA-seq data folder. Default: /proj/ngi2016003/nobackup/NGI/DATA
   -h, --help     Show this help message and exit
 """
         print(help_text)
 
 def main():
-    parser = CustomParser(add_help=False)  # disable default help
+    parser = CustomParser(add_help=False)
     parser.add_argument("ProjectID", help="Identifier for your RNA-seq project (e.g., P001)")
     parser.add_argument("Strandedness", help="Library strandedness (use 'auto' to auto-detect)")
-    parser.add_argument("-h", "--help", action="help", help="show this help message and exit")
+    parser.add_argument("-d", "--data", default="/proj/ngi2016003/nobackup/NGI/DATA",
+                        help="Path to RNA-seq data (default: %(default)s)")
+    parser.add_argument("-h", "--help", action="help", help="Show this help message and exit")
 
     args = parser.parse_args()
 
     # Generate CSV content
-    csv_content = f"# Sample sheet for project {args.ProjectID}\nStrandedness,{args.Strandedness}\n"
-    print(csv_content)
+    print(f"# Sample sheet for project {args.ProjectID}")
+    print(f"Strandedness,{args.Strandedness}")
+
+    # Build full path to project data
+    data_path = os.path.join(args.data, args.ProjectID)
+
+    if not os.path.exists(data_path):
+        sys.exit(f"Error: data path does not exist: {data_path}")
+
+    header = "sample,fastq_1,fastq_2,strandedness"
+    print(header)
+
+    sampleList = os.listdir(data_path)
+    sampleList.sort()
+
+    for sample in sampleList:
+        path_pattern = os.path.join(data_path, sample, '*/*/*R1*.gz')
+        paths = glob.glob(path_pattern)
+
+        for counter, R1 in enumerate(paths, 1):
+            R2 = R1.replace('_R1_','_R2_')
+            print(f"{sample},{R1},{R2},{args.Strandedness}")
+
 
 if __name__ == "__main__":
     main()
 
-if len(sys.argv) < 3:
-        sys.exit(usage)
-
-project = sys.argv[1]
-strandedness = sys.argv[2]	#forward/reverse/unstranded/auto
-data_path=os.path.join('/proj/ngi2016003/nobackup/NGI/DATA',project)
-header="sample,fastq_1,fastq_2,strandedness"
-sampleList=os.listdir(data_path)
-sampleList.sort()
-print(header)
-for sample in sampleList:
-	path_pattern = os.path.join(data_path, sample, '*/*/*R1*.gz')
-	paths = glob.glob(path_pattern)
-
-	for counter, R1 in enumerate(paths, 1):
-		index=str(counter)
-		R2 = R1.replace('_R1_','_R2_')
-		print(sample + ',' + R1 + ',' + R2 + ',' + strandedness)

From 6e1d5b715c23376fc8c7dfc001ac9ea9243d4c8c Mon Sep 17 00:00:00 2001
From: jun-wan <jun.wang@scilifelab.se>
Date: Mon, 10 Nov 2025 15:10:19 +0100
Subject: [PATCH 4/4] Update with specifying any data path

---
 create_rnaseq_samplesheet.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/create_rnaseq_samplesheet.py b/create_rnaseq_samplesheet.py
index bfcf04e..9028045 100644
--- a/create_rnaseq_samplesheet.py
+++ b/create_rnaseq_samplesheet.py
@@ -56,8 +56,8 @@ def main():
     sampleList.sort()
 
     for sample in sampleList:
-        path_pattern = os.path.join(data_path, sample, '*/*/*R1*.gz')
-        paths = glob.glob(path_pattern)
+        path_pattern = os.path.join(data_path, sample, '**', '*R1*.gz')
+        paths = glob.glob(path_pattern, recursive=True)
 
         for counter, R1 in enumerate(paths, 1):
             R2 = R1.replace('_R1_','_R2_')