Normalization/template_v0.8.5.cfg at main · itmat/Normalization · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261

# config file for [PORT - RNA-Seq Normalization & Quantification] (v0.8.5e-beta)

######################################################################################################################

# 0. NORMALIZTION and DATA TYPE

# [A] Normalization Type
#      Set Normalization method of interest to "TRUE".
#      (At least one method needs to be used.)

       GENE_NORM =
       EXON_INTRON_JUNCTION_NORM =

# [B] Stranded Data
#      Set STRANDED to "TRUE" if your data are strand-specific. (Default: FALSE)

       STRANDED =

#      If STRANDED = "TRUE", You need to provide the following information.

#      Set FWD to "TRUE" if forward read is in the same orientation as the transcripts/genes.
#      Set REV to "TRUE" if reverse read is in the same orientation as the transcripts/genes.
#       -------------------------------------------------------------------------------------
#      | Note: When dUTP-based protocol (e.g. Illumina TruSeq stranded protocol) is used,    |
#      | strand information comes from reverse read.                                         |
#       -------------------------------------------------------------------------------------

        FWD =
        REV =

# [C] Chromosome Names
#
#       ---------------------------------------------------------------------------------------
#      | By default, PORT uses numbered, X, or Y (e.g. chr1,chr2,...,chrX,chrY OR 1,2,...,X,Y) |
#      | as standard chromosome names.                                                         |
#       ---------------------------------------------------------------------------------------
#
#     [C-i] File of standard chromosome [optional]
#
#     Provide a full path to file of chromosomes (CHRNAMES)
#     if your chromosome names do not follow the chromosome nomenclature described above.
#     (file with one name per line)

      CHRNAMES =

#     [C-ii] Name of mitochondrial chromosome [required]
#
#     Provide a name of mitochondrial chromosome (e.g. chrM, M).
#     If there are multiple mitochondrial chromosomes, provide a comma separated list of chromosome names.

      CHRM =


######################################################################################################################

# 1. CLUSTER INFO

#      If you're using either SGE (Sun Grid Engine) or LSF (Load Sharing Facility),
#      simply set the cluster type to "TRUE".
#      You may edit the queue names and max_jobs.

#---------------------------------------------------------------------------------------------------------------------

SGE_CLUSTER =

#SUBMIT_BATCH_JOBS_sge = qsub -cwd
#JOB_NAME_OPTION_sge = -N
#CHECK_STATUS_FULLNAME_sge = qstat -r
REQUEST_RESOURCE_OPTION_sge = -l h_vmem=
QUEUE_NAME_3G_sge = 3G
QUEUE_NAME_6G_sge = 6G
QUEUE_NAME_10G_sge = 10G
QUEUE_NAME_15G_sge = 15G
QUEUE_NAME_30G_sge = 30G
QUEUE_NAME_45G_sge = 45G
QUEUE_NAME_60G_sge = 60G
MAX_JOBS_sge = 1000

#---------------------------------------------------------------------------------------------------------------------

LSF_CLUSTER =

#SUBMIT_BATCH_JOBS_lsf = bsub
#JOB_NAME_OPTION_lsf = -J
#CHECK_STATUS_FULLNAME_lsf = bjobs -w
REQUEST_RESOURCE_OPTION_lsf = -M
QUEUE_NAME_3G_lsf = 3072
QUEUE_NAME_6G_lsf = 6144
QUEUE_NAME_10G_lsf = 10240
QUEUE_NAME_15G_lsf = 15360
QUEUE_NAME_30G_lsf = 30720
QUEUE_NAME_45G_lsf = 46080
QUEUE_NAME_60G_lsf = 61440
MAX_JOBS_lsf = 1000

#---------------------------------------------------------------------------------------------------------------------

#     If you're NOT on SGE or LSF cluster, set OTHER_CLUSTER = "TRUE"
#     and provide the commands/options for your cluster.

OTHER_CLUSTER =

# [1] command for submitting batch jobs from current working directory (e.g. bsub, qsub -cwd)
  SUBMIT_BATCH_JOBS =
# [2] option for setting jobname for batch job submission command (e.g. -J, -N)
  JOB_NAME_OPTION =
# [3] option for requesting resources for batch job submission command (e.g. -M, -l h_vmem=)
  REQUEST_RESOURCE_OPTION =
# [4] queue names for 3G (e.g. normal, 3G)
  QUEUE_NAME_3G =
# [5] queue name for 6G (e.g. plus, 6G)
  QUEUE_NAME_6G =
# [6] queue name for 10G (e.g. max_mem30, 10G)
  QUEUE_NAME_10G =
# [7] queue name for 15G (e.g. max_mem30, 15G)
  QUEUE_NAME_15G =
# [8] queue name for 30G (e.g. max_mem30, 30G)
  QUEUE_NAME_30G =
# [9] queue name for 45G (e.g. max_mem64, 45G)
  QUEUE_NAME_45G =
# [10] queue name for 60G (e.g. max_mem64, 60G)
  QUEUE_NAME_60G =
# [11] maximum number of slots for a user
  MAX_JOBS =
# [12] command for checking batch job status followed by option to view full job name (e.g. bjobs -w, qstat -r)
  CHECK_STATUS_FULLNAME =

#---------------------------------------------------------------------------------------------------------------------

#     [OPTIONAL]
#     For clusters which only allows job submissions from the head node, specify the job submission host name below.

  HOST_NAME =

######################################################################################################################

# 2. GENE INFO

#     Provide a full path to gene information file.
#
#     * gene normalization requires an ensembl gene info file.
#
#      ensGenes files for mm9, mm10, hg19, hg38, dm3 and danRer7 are available in Normalization/norm_scripts directory:
#        mm9: /path/to/Normalization/norm_scripts/mm9_ensGenes.txt
#        mm10: /path/to/Normalization/norm_scripts/Mus_musculus.GRCm38.84.PORT_geneinfo.txt
#        hg19: /path/to/Normalization/norm_scripts/hg19_ensGenes.txt
#        hg38: /path/to/Normalization/norm_scripts/Homo_sapiens.GRCh38.84.PORT_geneinfo.txt
#        dm3: /path/to/Normalization/norm_scripts/dm3_ensGenes.txt
#        danRer7: /path/to/Normalization/norm_scripts/danRer7_ensGenes.txt

#
#     * Alternatively, you can use a perl script (/path/to/Normalization/norm_scripts/convert_gtf_to_PORT_geneinfo.transcripts.pl)
#       to convert an ENSEMBL gtf file to a gene information file.

#      -------------------------------------------------------------------------------------------------------
#     | Gene info file must contain column headers with the following suffixes:                               |
#     | name, chrom, strand, txStart, txEnd, exonStarts, exonEnds, name2, ensemblToGeneName.value, geneSymbol |
#     | (optional suffixes for annotation: description)                                                       |
#      -------------------------------------------------------------------------------------------------------

       GENE_INFO_FILE =

######################################################################################################################

# 3. FA and FAI

#     Provide a full path to fa and fai file.

# [1] genome fasta file
#      -------------------------------------------------------------------------------
#     | The description line (the header line that begins with ">") must begin with   |
#     | chromosome names that match the chromosome names in #2. GENE INFO file(s).    |
#     | Please check and modify the file appropriately.                               |
#      -------------------------------------------------------------------------------
#     ucsc genome fa files for mm9, hg19, and dm3 are available for download (gunzip after download):
#       mm9: wget http://itmat.indexes.s3.amazonaws.com/mm9_genome_one-line-seqs.fa.gz
#       hg19: wget http://itmat.indexes.s3.amazonaws.com/hg19_genome_one-line-seqs.fa.gz
#       dm3: wget http://itmat.indexes.s3.amazonaws.com/dm3_genome_one-line-seqs.fa.gz
#       danRer7: wget http://itmat.indexes.s3.amazonaws.com/danRer7_genome_one-line-seqs.fa.gz

      GENOME_FA =

# [2] index file
#     you can get fai file using samtools (samtools faidx <ref.fa>)

      GENOME_FAI =

# [3] samtools
#     provide the location of your copy of samtools

      SAMTOOLS =

######################################################################################################################

# 4. rRNA

# [1] Set rRNA_PREFILTERED = "TRUE" if you prefiltered the ribosomal reads. Default: FALSE
#     PORT will skip the BLAST step if rRNA_PREFILTERED = TRUE

      rRNA_PREFILTERED =

# [2] rRNA sequence fasta file

#     Provide a rRNA sequence file if you want to filter out the ribosomal reads.

#     rRNA sequence file for Mammal (mm9 - can be used for all mammal), Drosophila melanogaster (dm),
#     Zebrafish (danRer) and C.elegans is available in Normalization/norm_scripts directory:
#       Mammal: /path/to/Normalization/norm_scripts/rRNA_mm9.fa
#       Drosophila melanogaster: /path/to/Normalization/norm_scripts/rRNA_dm.fa
#       Zebrafish: /path/to/Normalization/norm_scripts/rRNA_danRer.fa
#       C.elegans: /path/to/Normalization/norm_scripts/rRNA_c.elegans.fa

      rRNA_FA =

######################################################################################################################

# 5. DATA VISUALIZATION

#     Set SAM2COV = "TRUE" if you want to use sam2cov to generate coverage files. Default: FALSE
#     * sam2cov only supports reads aligned with RUM, STAR, or GSNAP

      SAM2COV =

# [1] If SAM2COV is set to TRUE, provide the location of your copy of sam2cov (full path)

      SAM2COV_LOC =

# [2] Set the aligner used to "TRUE"

      RUM =
      STAR_GSNAP =

######################################################################################################################

# 6. CLEANUP

# By default, CLEANUP step only deletes the intermediate SAM files.
# If you want to compress SAM and/or coverage files, please change the settings below.
# Note that these intermediate SAM files are necessary to run part II
# of the PORT pipeline. If they are deleted, you will NOT be able to
# re-run part II of PORT, without first running part I to regenerate
# the intermediate files.

#      This step deletes all intermediate SAM files if set to "TRUE".
#      Set DELETE_INT_SAM to "FALSE" if you wish to keep the SAM files
#      or if you plan to re-run part II of PORT multiple times. (Default : TRUE)

       DELETE_INT_SAM =

#      This converts input & final SAM files to BAM format and deletes the SAM files if set to "TRUE".
#      Set CONVERT_SAM2BAM to "FALSE" if you wish to keep the SAM files. (Default : FALSE)

       CONVERT_SAM2BAM =

#      This gzips the coverage files generated from sam2cov if set to "TRUE".
#      Set GZIP_COV to "FALSE" if you wish to keep the original coverage files. (Default : FALSE)

       GZIP_COV =

######################################################################################################################