forked from BimberLab/DiscvrLabKeyModules
-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathSequenceAnalysis-11.20-11.30.sql
More file actions
306 lines (237 loc) · 8.14 KB
/
SequenceAnalysis-11.20-11.30.sql
File metadata and controls
306 lines (237 loc) · 8.14 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
/*
* Copyright (c) 2012 LabKey Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/* SequenceAnalysis-11.20-11.21.sql */
-- NOTE: due to a naming problem, this script probably never ran on earlier installs
-- alter table sequenceanalysis.virus_strains
-- drop column workbook;
-- alter table sequenceanalysis.samples
-- drop column workbook;
--moving to laboratory module
drop table if exists sequenceanalysis.species;
drop table if exists sequenceanalysis.sample_source;
drop table if exists sequenceanalysis.dna_mol_type;
drop table if exists sequenceanalysis.external_dbs;
/* SequenceAnalysis-11.21-11.22.sql */
alter table sequenceAnalysis.site_module_properties
drop column container
;
DROP TABLE if exists sequenceAnalysis.chemistries;
DROP TABLE IF EXISTS sequenceanalysis.sequence_platforms;
CREATE TABLE sequenceanalysis.sequence_platforms (
platform varchar(45) NOT NULL,
aliases varchar(200),
CONSTRAINT PK_sequence_platforms PRIMARY KEY (platform)
)
WITH (OIDS=FALSE)
;
-- ----------------------------
-- Records of sequenceAnalysis.sequence_platforms
-- ----------------------------
-- @SkipOnEmptySchemasBegin
INSERT INTO sequenceanalysis.sequence_platforms
(platform,aliases)
VALUES
('ILLUMINA', 'SLX,SOLEXA'),
('SOLID', null),
('LS454', '454'),
('COMPLETE_GENOMICS', 'COMPLETE'),
('PACBIO', null),
('ION_TORRENT', 'IONTORRENT'),
('SANGER', null)
;
-- @SkipOnEmptySchemasEnd
update sequenceAnalysis.sequence_reads set chemistry = 'LS454' where chemistry = 'Pyrosequencing';
delete from sequenceAnalysis.site_module_properties where prop_name = 'contactEmail';
-- @SkipOnEmptySchemasBegin
insert into sequenceAnalysis.site_module_properties (prop_name, stringValue) VALUES ('contactEmail', 'bbimber@labkey.com');
-- @SkipOnEmptySchemasEnd
DROP TABLE IF EXISTS sequenceAnalysis.sequence_readsets;
CREATE TABLE sequenceAnalysis.sequence_readsets (
RowId serial NOT NULL,
name varchar(220),
subjectid integer,
sampleid integer,
platform varchar(100),
comments text default null,
Container ENTITYID NOT NULL,
CreatedBy USERID,
Created TIMESTAMP,
ModifiedBy USERID,
Modified TIMESTAMP,
CONSTRAINT PK_sequence_readsets PRIMARY KEY (rowId)
);
alter table sequenceAnalysis.sequence_analyses
add column readset integer
;
ALTER TABLE sequenceAnalysis.sequence_reads
add column readset integer
;
--populate readsets based on sequence_reads
insert into sequenceAnalysis.sequence_readsets
(sampleid,container,created,createdby,modified,modifiedby,platform)
(select a.sampleid,container,max(created) as created,max(createdby) as createdby, max(modified) as modified,max(modifiedby) as modifiedby, 'LS454' as platform
FROM sequenceAnalysis.sequence_analyses a
GROUP BY a.sampleid, a.container
);
--then update sequence_reads based on readsets
UPDATE sequenceAnalysis.sequence_reads s
SET readset = (
select rs.rowid
from sequenceAnalysis.sequence_readsets rs
join sequenceAnalysis.sequence_analyses a
on (a.sampleid=rs.sampleid and a.container=rs.container)
WHERE s.analysis_id=a.rowid and s.container=a.container);
/* SequenceAnalysis-11.22-11.23.sql */
--moved to the laboratory module:
drop table if exists sequenceanalysis.species;
drop table if exists sequenceanalysis.sample_source;
drop table if exists sequenceanalysis.external_dbs;
drop table if exists sequenceanalysis.geographic_origins;
drop table if exists sequenceanalysis.dna_mol_type;
--drop table if exists sequenceanalysis.samples;
alter table sequenceAnalysis.sequence_readsets
add column fileid integer
;
/* SequenceAnalysis-11.23-11.24.sql */
alter table sequenceAnalysis.nt_snps
add column quality_score double precision,
add column avg_qual double precision
;
alter table sequenceAnalysis.aa_snps
add column avg_qual double precision
;
alter table sequenceAnalysis.virus_strains
drop column workbook
;
alter table sequenceAnalysis.sequence_analyses
add column alignmentFile integer,
add column snpFile integer
;
/* SequenceAnalysis-11.24-11.25.sql */
alter table sequenceanalysis.sequence_readsets
drop column subjectid,
add column subjectid varchar(200)
;
alter table sequenceanalysis.ref_nt_sequences
add column aliases varchar(1000)
;
/* SequenceAnalysis-11.25-11.26.sql */
alter table sequenceanalysis.sequence_alignments
drop column haplotype
;
alter table sequenceanalysis.samples
drop column workbook
;
alter table sequenceanalysis.sequence_analyses
drop column sampleid
;
alter table sequenceanalysis.haplotype_types
drop column container
;
alter table sequenceanalysis.haplotype_types
drop column created
;
alter table sequenceanalysis.haplotype_types
drop column createdby
;
alter table sequenceanalysis.haplotype_types
drop column modified
;
alter table sequenceanalysis.haplotype_types
drop column modifiedby
;
alter table sequenceanalysis.haplotype_sequences
drop column container
;
drop table if exists sequenceanalysis.haplotype_mapping;
drop table if exists sequenceanalysis.haplotype_definitions;
drop table if exists sequenceanalysis.haplotypes;
CREATE TABLE sequenceanalysis.haplotypes (
name varchar(200) NOT NULL,
type varchar(200),
comment text,
CreatedBy USERID,
Created timestamp,
ModifiedBy USERID,
Modified timestamp,
CONSTRAINT PK_haplotypes PRIMARY KEY (name)
);
alter table sequenceanalysis.ref_nt_sequences
rename column category4 to lineage;
alter table sequenceanalysis.ref_nt_sequences
rename column category3 to locus;
alter table sequenceanalysis.ref_nt_sequences
rename column category2 to subset;
alter table sequenceanalysis.ref_nt_sequences
rename column category1 to category;
/* SequenceAnalysis-11.26-11.27.sql */
ALTER TABLE sequenceAnalysis.sequence_coverage
ADD COLUMN avgqual_a double precision,
ADD COLUMN avgqual_t double precision,
ADD COLUMN avgqual_g double precision,
ADD COLUMN avgqual_c double precision,
ADD COLUMN avgqual_n double precision,
ADD COLUMN avgqual_del double precision
;
ALTER TABLE sequenceAnalysis.nt_snps
ADD pvalue double precision
;
ALTER TABLE sequenceAnalysis.aa_snps
ADD column min_pvalue double precision
;
ALTER TABLE sequenceAnalysis.sequence_analyses
add column reference_library integer
;
ALTER TABLE sequenceAnalysis.sequence_readsets
add column barcode5 varchar(100)
;
ALTER TABLE sequenceAnalysis.sequence_readsets
add column barcode3 varchar(100)
;
CREATE INDEX aa_snps_ref_aa_position_codon
ON sequenceanalysis.aa_snps (analysis_id, ref_nt_id, ref_aa_id, ref_aa_position, ref_aa_insert_index, ref_aa, q_aa, q_codon);
/* SequenceAnalysis-11.27-11.28.sql */
ALTER TABLE sequenceAnalysis.sequence_coverage
ADD COLUMN pvalue_a double precision,
ADD COLUMN pvalue_t double precision,
ADD COLUMN pvalue_g double precision,
ADD COLUMN pvalue_c double precision,
ADD COLUMN pvalue_n double precision,
ADD COLUMN pvalue_del double precision
;
ALTER TABLE sequenceAnalysis.sequence_readsets
add column raw_input_file integer
;
/* SequenceAnalysis-11.28-11.29.sql */
ALTER TABLE sequenceanalysis.sequence_alignments
ALTER COLUMN read_id DROP NOT NULL;
ALTER TABLE sequenceanalysis.aa_snps
ALTER COLUMN adj_percent TYPE double precision;
ALTER TABLE sequenceanalysis.aa_snps
ALTER COLUMN raw_percent TYPE double precision;
ALTER TABLE sequenceanalysis.aa_snps
ALTER COLUMN adj_depth TYPE double precision;
ALTER TABLE sequenceanalysis.aa_snps
ALTER COLUMN raw_depth TYPE double precision;
ALTER TABLE sequenceanalysis.aa_snps
ALTER COLUMN adj_reads TYPE double precision;
ALTER TABLE sequenceanalysis.aa_snps
ALTER COLUMN raw_reads TYPE double precision;
/* SequenceAnalysis-11.29-11.30.sql */
ALTER TABLE sequenceanalysis.sequence_readsets
ADD COLUMN qc_file integer;
ALTER TABLE sequenceanalysis.sequence_analyses
ADD COLUMN qc_file integer;