-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathRead_Mapping.sh
More file actions
116 lines (109 loc) · 3.83 KB
/
Read_Mapping.sh
File metadata and controls
116 lines (109 loc) · 3.83 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
#!/bin/bash
set -o pipefail
# Get input file (from directory or list)
if [[ -d "$RM_INPUT" ]]; then #if input is a DIRECTORY
f1=$(find $RM_INPUT -maxdepth 1 -name "*$FORWARD" | sed -n ${PBS_ARRAYID}p)
elif [[ -f "$RM_INPUT" ]]; then #if input is a FILE
f1=$(sed -n ${PBS_ARRAYID}p $RM_INPUT)
else
echo "Please specify a valid directory or list of files in the config"
fi
name=$(basename ${f1%%$FORWARD}"") #Name of forward sample
### Get reverse read if paired-end, make sure files are valid
if [[ "$PE" == "True" ]]; then
f2=${f1%%$FORWARD}"$REVERSE"
if [[ -f $f1 && -f $f2 ]]; then
echo "Mapping PE reads for sample $name"
else
echo "$f1 and $f2 are not both valid files"
exit 1
fi
else
f2=""
if [[ -f $f1 ]]; then
echo "Mapping SE reads for sample $name"
else
echo "$f1 is not a valid file"
exit 1
fi
fi
### Obtain Sample Name + Lane # to add read group information to mapped files
LANE_NUM=$(grep -o "L00[1-4]" <<< $name | cut -c 4) #Obtain Lane #
SAMPLE_NAME=${name%%[!0-9]*}
ID="${SAMPLE_NAME}:${FLOWCELL_NAME}.${LANE_NUM}"
echo "File name indicates the sample name is ${SAMPLE_NAME} and the lane number is ${LANE_NUM}"
echo "The read group ID field will be ${ID}"
###Genomic Coordinate Output
if [[ "$GENOMIC_COORDINATE_BAMSORTED" == "yes" ]]; then #If genomic alignments should be output as sorted BAM files
FORMAT="BAM SortedByCoordinate"
echo "Output Genomic Alignments will be sorted BAM files"
else
FORMAT="SAM"
echo "Output Genomic Alignments will be unsorted SAM files"
fi
###STAR mapping
if [[ "$RM_PASS" == "first" ]]; then ###first pass mode
echo "In first pass Mode"
${STAR_FILE} \
--runThreadN $RM_NTHREAD \
--genomeDir $GEN_DIR \
--readFilesIn $f1 $f2 \
--readFilesCommand gunzip -c \
--seedSearchStartLmax $SEEDSEARCH \
--outFileNamePrefix $CJ_OUTPUTDIR/"$name" \
--outFilterMismatchNmax $MAX_MIS \
--outFilterMultimapNmax $MAX_N \
--outFilterScoreMinOverLread $MINSCORE_READL \
--outFilterMatchNminOverLread $MINMATCH_READL \
--outReadsUnmapped $UNMAP_F \
--outSAMtype SAM \
--quantMode - \
--outSAMattrRGline ID:${ID} LB:${SAMPLE_NAME} PL:${PLATFORM} SM:${SAMPLE_NAME} PU:${ID} \
--outFilterType BySJout \
--outSJfilterReads Unique ## could change later to be a filtering step
elif [[ "$RM_PASS" == "second" ]]; then ###second pass mode
if [[ ! -z "$JUNCTIONS" ]]; then ### if using junctions
echo "In second pass mode using $NUM_JUNCTIONS junction files"
echo "Junctions are as follows: $JUNCTIONS"
${STAR_FILE} \
--runThreadN $RM_NTHREAD \
--genomeDir $GEN_DIR \
--readFilesIn $f1 $f2 \
--readFilesCommand gunzip -c \
--seedSearchStartLmax $SEEDSEARCH \
--outFileNamePrefix $RM_OUTPUTDIR/"$name" \
--outFilterMismatchNmax $MAX_MIS \
--outFilterMultimapNmax $MAX_N \
--outFilterScoreMinOverLread $MINSCORE_READL \
--outFilterMatchNminOverLread $MINMATCH_READL \
--outReadsUnmapped $UNMAP_F \
--outSAMtype $FORMAT \
--outSAMattributes NH HI AS nM NM MD \
--quantMode $QUANT \
--outSAMattrRGline ID:${ID} LB:${SAMPLE_NAME} PL:${PLATFORM} SM:${SAMPLE_NAME} PU:${ID} \
--outFilterType BySJout \
--sjdbFileChrStartEnd $JUNCTIONS
else
echo "Mapping without incorporating un-annotated junctions"
${STAR_FILE} \
--runThreadN $RM_NTHREAD \
--genomeDir $GEN_DIR \
--readFilesIn $f1 $f2 \
--readFilesCommand gunzip -c \
--seedSearchStartLmax $SEEDSEARCH \
--outFileNamePrefix $RM_OUTPUTDIR/"$name" \
--outFilterMismatchNmax $MAX_MIS \
--outFilterMultimapNmax $MAX_N \
--outFilterScoreMinOverLread $MINSCORE_READL \
--outFilterMatchNminOverLread $MINMATCH_READL \
--outReadsUnmapped $UNMAP_F \
--outSAMtype $FORMAT \
--outSAMattributes NH HI AS nM NM MD \
--quantMode $QUANT \
--outSAMattrRGline ID:${ID} LB:${SAMPLE_NAME} PL:${PLATFORM} SM:${SAMPLE_NAME} PU:${ID} \
--outFilterType BySJout
fi
else
echo "Error: Unsure of whether first or second pass mode, exiting..."
exit 1
fi