Skip to content

Commit 21d334a

Browse files
authored
Merge pull request #419 from LabKey/fb_merge_25.11_to_develop
Merge discvr-25.11 to develop
2 parents 0cf5147 + b259c8d commit 21d334a

File tree

45 files changed

+728
-1311
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+728
-1311
lines changed

SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/RefNtSequenceModel.java

Lines changed: 32 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
import htsjdk.samtools.util.StringUtil;
1919
import org.apache.commons.io.IOUtils;
2020
import org.apache.logging.log4j.Logger;
21-
import org.apache.logging.log4j.LogManager;
2221
import org.jetbrains.annotations.Nullable;
2322
import org.labkey.api.data.Container;
2423
import org.labkey.api.data.ContainerManager;
@@ -32,8 +31,11 @@
3231
import org.labkey.api.exp.api.ExpData;
3332
import org.labkey.api.exp.api.ExperimentService;
3433
import org.labkey.api.files.FileContentService;
34+
import org.labkey.api.security.Crypt;
3535
import org.labkey.api.security.User;
36+
import org.labkey.api.util.FileUtil;
3637
import org.labkey.api.util.MemTracker;
38+
import org.labkey.api.util.logging.LogHelper;
3739
import org.labkey.api.writer.PrintWriters;
3840

3941
import java.io.File;
@@ -55,7 +57,9 @@
5557
*/
5658
public class RefNtSequenceModel implements Serializable
5759
{
58-
private static final Logger _log = LogManager.getLogger(RefNtSequenceModel.class);
60+
private static final Logger _log = LogHelper.getLogger(RefNtSequenceModel.class, "Messages related to Reference NT Sequences");
61+
62+
public static String BASE_DIRNAME = ".sequences";
5963

6064
private int _rowid;
6165
private String _name;
@@ -414,7 +418,7 @@ public byte[] getSequenceBases()
414418

415419
public void createFileForSequence(User u, String sequence, @Nullable File outDir) throws IOException
416420
{
417-
File output = getExpectedSequenceFile(outDir);
421+
File output = getExpectedSequenceFile();
418422
if (output.exists())
419423
{
420424
output.delete();
@@ -439,9 +443,9 @@ public void createFileForSequence(User u, String sequence, @Nullable File outDir
439443
Table.update(u, ti, this, _rowid);
440444
}
441445

442-
private File getExpectedSequenceFile(@Nullable File outDir) throws IllegalArgumentException
446+
public File getExpectedSequenceFile() throws IllegalArgumentException
443447
{
444-
return new File(getSequenceDir(true, outDir), _rowid + ".txt.gz");
448+
return FileUtil.appendName(getHashedDir(true), _rowid + ".txt.gz");
445449
}
446450

447451
private Container getLabKeyContainer()
@@ -455,33 +459,17 @@ private Container getLabKeyContainer()
455459
return c;
456460
}
457461

458-
private File getSequenceDir(boolean create, @Nullable File outDir) throws IllegalArgumentException
462+
private File getBaseSequenceDir() throws IllegalArgumentException
459463
{
460464
Container c = getLabKeyContainer();
461-
File ret = outDir == null ? getReferenceSequenceDir(c) : outDir;
462-
if (create && !ret.exists())
463-
{
464-
ret.mkdirs();
465-
}
466-
467-
return ret;
468-
}
469-
470-
private File getReferenceSequenceDir(Container c) throws IllegalArgumentException
471-
{
472465
FileContentService fileService = FileContentService.get();
473466
File root = fileService == null ? null : fileService.getFileRoot(c, FileContentService.ContentType.files);
474467
if (root == null)
475468
{
476469
throw new IllegalArgumentException("File root not defined for container: " + c.getPath());
477470
}
478471

479-
return new File(root, ".sequences");
480-
}
481-
482-
public void writeSequence(Writer writer, int lineLength) throws IOException
483-
{
484-
writeSequence(writer, lineLength, null, null);
472+
return FileUtil.appendName(root, BASE_DIRNAME);
485473
}
486474

487475
public void writeSequence(Writer writer, int lineLength, Integer start, Integer end) throws IOException
@@ -562,6 +550,26 @@ public File getOffsetsFile()
562550
return null;
563551
}
564552

565-
return new File(d.getFile().getParentFile(), getRowid() + "_offsets.txt");
553+
return FileUtil.appendName(d.getFile().getParentFile(), getRowid() + "_offsets.txt");
554+
}
555+
556+
private File getHashedDir(boolean create)
557+
{
558+
File baseDir = getBaseSequenceDir();
559+
String digest = Crypt.MD5.digest(String.valueOf(getRowid()));
560+
561+
baseDir = FileUtil.appendName(baseDir, digest.substring(0,4));
562+
baseDir = FileUtil.appendName(baseDir, digest.substring(4,8));
563+
baseDir = FileUtil.appendName(baseDir, digest.substring(8,12));
564+
baseDir = FileUtil.appendName(baseDir, digest.substring(12,20));
565+
baseDir = FileUtil.appendName(baseDir, digest.substring(20,28));
566+
baseDir = FileUtil.appendName(baseDir, digest.substring(28,32));
567+
568+
if (create)
569+
{
570+
baseDir.mkdirs();
571+
}
572+
573+
return baseDir;
566574
}
567575
}

SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java

Lines changed: 56 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
package org.labkey.api.sequenceanalysis.run;
22

3+
import org.apache.commons.collections4.list.UnmodifiableList;
34
import org.apache.commons.io.FileUtils;
45
import org.apache.commons.lang3.StringUtils;
56
import org.apache.logging.log4j.Logger;
@@ -34,6 +35,7 @@ public class DockerWrapper extends AbstractCommandWrapper
3435
private boolean _useLocalContainerStorage;
3536
private String _alternateUserHome = null;
3637
private final Map<String, String> _dockerEnvironment = new HashMap<>();
38+
private int _maxRetries = 3;
3739

3840
public DockerWrapper(String containerName, Logger log, PipelineContext ctx)
3941
{
@@ -199,7 +201,7 @@ public void executeWithDocker(List<String> containerArgs, File workDir, Pipeline
199201

200202
localBashScript.setExecutable(true);
201203
dockerBashScript.setExecutable(true);
202-
execute(Arrays.asList("/bin/bash", localBashScript.getPath()));
204+
executeWithRetry(Arrays.asList("/bin/bash", localBashScript.getPath()));
203205

204206
if (_useLocalContainerStorage)
205207
{
@@ -214,6 +216,59 @@ public void executeWithDocker(List<String> containerArgs, File workDir, Pipeline
214216
}
215217
}
216218

219+
public int getMaxRetries()
220+
{
221+
return _maxRetries;
222+
}
223+
224+
public void setMaxRetries(int maxRetries)
225+
{
226+
_maxRetries = maxRetries;
227+
}
228+
229+
// NOTE: when running on a shared/cluster environment with multiple containers initializing concurrently, conflicts can result in these error codes.
230+
// As a convenience, build in auto-retry behavior if one of these occurs
231+
private final List<Integer> ALLOWABLE_FAIL_CODES = new UnmodifiableList<>(Arrays.asList(125, 127));
232+
233+
private void executeWithRetry(final List<String> args) throws PipelineJobException
234+
{
235+
int retries = 0;
236+
while (retries <= getMaxRetries())
237+
{
238+
try
239+
{
240+
execute(args);
241+
break;
242+
}
243+
catch (PipelineJobException e)
244+
{
245+
if (ALLOWABLE_FAIL_CODES.contains(getLastReturnCode()))
246+
{
247+
retries++;
248+
if (retries > getMaxRetries())
249+
{
250+
getLogger().info("Maximum retries exceeded");
251+
throw e;
252+
}
253+
254+
getLogger().info("Exit code " + getLastReturnCode() + ", retrying after 1 sec (" + retries + " of " + getMaxRetries()+ ")");
255+
try
256+
{
257+
Thread.sleep(1000);
258+
}
259+
catch (InterruptedException ex)
260+
{
261+
throw new PipelineJobException(ex);
262+
}
263+
}
264+
else
265+
{
266+
throw e;
267+
}
268+
}
269+
}
270+
}
271+
217272
private String getEffectiveContainerName()
218273
{
219274
return _containerName;

SequenceAnalysis/resources/queries/sequenceanalysis/alignment_summary_by_lineage.sql

Lines changed: 10 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,7 @@
1-
/*
2-
* Copyright (c) 2012 LabKey Corporation
3-
*
4-
* Licensed under the Apache License, Version 2.0 (the "License");
5-
* you may not use this file except in compliance with the License.
6-
* You may obtain a copy of the License at
7-
*
8-
* http://www.apache.org/licenses/LICENSE-2.0
9-
*
10-
* Unless required by applicable law or agreed to in writing, software
11-
* distributed under the License is distributed on an "AS IS" BASIS,
12-
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13-
* See the License for the specific language governing permissions and
14-
* limitations under the License.
15-
*/
1+
PARAMETERS(AnalysisId INTEGER)
2+
163
select
17-
(CAST(a.analysis_id as varchar) || '<>' || a.lineages) as key,
4+
(CAST(AnalysisId as varchar) || '<>' || a.lineages) as key,
185
a.analysis_id,
196
a.lineages,
207
max(a.totalLineages) as totalLineages,
@@ -25,13 +12,13 @@ select
2512
round(100 * (cast(sum(a.total) as float) / cast(max(a.total_reads) as float)), 2) as percent,
2613
group_concat(distinct a.haplotypesWithAllele) as haplotypesWithAllele,
2714

28-
CAST((select sum(s.total) as total FROM sequenceanalysis.alignment_summary s WHERE s.analysis_id = a.analysis_id AND s.rowid IN (
29-
SELECT distinct asj.alignment_id from sequenceanalysis.alignment_summary_junction asj WHERE asj.ref_nt_id.locus = a.loci and asj.status = true
15+
CAST((select sum(s.total) as total FROM sequenceanalysis.alignment_summary s WHERE s.analysis_id = AnalysisId AND s.rowid IN (
16+
SELECT distinct asj.alignment_id from sequenceanalysis.alignment_summary_junction asj WHERE asj.analysis_id = AnalysisId AND asj.ref_nt_id.locus = a.loci and asj.status = true
3017
)
3118
) as integer) as total_reads_from_locus,
3219

33-
round(100 * (cast(sum(a.total) as float) / cast((select sum(s.total) as total FROM sequenceanalysis.alignment_summary s WHERE s.analysis_id = a.analysis_id AND s.rowid IN (
34-
SELECT distinct asj.alignment_id from sequenceanalysis.alignment_summary_junction asj WHERE asj.ref_nt_id.locus = a.loci and asj.status = true
20+
round(100 * (cast(sum(a.total) as float) / cast((select sum(s.total) as total FROM sequenceanalysis.alignment_summary s WHERE s.analysis_id = AnalysisId AND s.rowid IN (
21+
SELECT distinct asj.alignment_id from sequenceanalysis.alignment_summary_junction asj WHERE asj.analysis_id = AnalysisId AND asj.ref_nt_id.locus = a.loci and asj.status = true
3522
)
3623
) as float)), 2) as percent_from_locus,
3724
group_concat(distinct a.rowid, ',') as rowids
@@ -47,15 +34,16 @@ FROM (
4734
group_concat(distinct coalesce(j.ref_nt_id.locus, j.ref_nt_id.name), chr(10)) as loci,
4835

4936
total,
50-
cast((select sum(total) as total FROM sequenceanalysis.alignment_summary s WHERE s.analysis_id = a.analysis_id) as integer) as total_reads,
37+
cast((select sum(total) as total FROM sequenceanalysis.alignment_summary s WHERE s.analysis_id = AnalysisId) as integer) as total_reads,
5138
group_concat(distinct hs.haplotype, chr(10)) as haplotypesWithAllele
5239

5340
from sequenceanalysis.alignment_summary a
54-
join sequenceanalysis.alignment_summary_junction j ON (j.alignment_id = a.rowid and j.status = true)
41+
join sequenceanalysis.alignment_summary_junction j ON (j.analysis_id = AnalysisId AND j.alignment_id = a.rowid and j.status = true)
5542
left join sequenceanalysis.haplotype_sequences hs ON ((
5643
(hs.name = j.ref_nt_id.lineage AND hs.type = 'Lineage') OR
5744
(hs.name = j.ref_nt_id.name AND hs.type = 'Allele')
5845
) AND hs.haplotype.datedisabled IS NULL)
46+
WHERE a.analysis_id = AnalysisId
5947
group by a.analysis_id, a.rowid, a.total
6048

6149
) a

SequenceAnalysis/resources/queries/sequenceanalysis/alignment_summary_by_lineage_pivoted.query.xml

Lines changed: 0 additions & 9 deletions
This file was deleted.

SequenceAnalysis/resources/queries/sequenceanalysis/alignment_summary_by_lineage_pivoted.sql

Lines changed: 0 additions & 9 deletions
This file was deleted.

SequenceAnalysis/resources/queries/sequenceanalysis/alignment_summary_grouped.sql

Lines changed: 9 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,5 @@
1-
/*
2-
* Copyright (c) 2012 LabKey Corporation
3-
*
4-
* Licensed under the Apache License, Version 2.0 (the "License");
5-
* you may not use this file except in compliance with the License.
6-
* You may obtain a copy of the License at
7-
*
8-
* http://www.apache.org/licenses/LICENSE-2.0
9-
*
10-
* Unless required by applicable law or agreed to in writing, software
11-
* distributed under the License is distributed on an "AS IS" BASIS,
12-
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13-
* See the License for the specific language governing permissions and
14-
* limitations under the License.
15-
*/
1+
PARAMETERS(AnalysisId INTEGER)
2+
163
select
174
a.analysis_id,
185
a.alleles,
@@ -36,13 +23,13 @@ select
3623
group_concat(a.rowid, ',') as rowids,
3724
group_concat(distinct a.haplotypesWithAllele) as haplotypesWithAllele,
3825

39-
CAST((select sum(s.total) as total FROM sequenceanalysis.alignment_summary s WHERE s.analysis_id = a.analysis_id AND s.rowid IN (
40-
SELECT distinct asj.alignment_id from sequenceanalysis.alignment_summary_junction asj WHERE asj.ref_nt_id.locus = a.loci and asj.status = true
26+
CAST((select sum(s.total) as total FROM sequenceanalysis.alignment_summary s WHERE s.analysis_id = AnalysisId AND s.rowid IN (
27+
SELECT distinct asj.alignment_id from sequenceanalysis.alignment_summary_junction asj WHERE asj.analysis_id = AnalysisId AND asj.ref_nt_id.locus = a.loci and asj.status = true
4128
)
4229
) as INTEGER) as total_reads_from_locus,
4330

44-
round(100 * (cast(sum(a.total) as float) / CASE WHEN count(a.lineages) = 0 THEN max(a.total_reads) ELSE cast((select sum(s.total) as total FROM sequenceanalysis.alignment_summary s WHERE s.analysis_id = a.analysis_id AND s.rowid IN (
45-
SELECT distinct asj.alignment_id from sequenceanalysis.alignment_summary_junction asj WHERE asj.ref_nt_id.locus = a.loci and asj.status = true
31+
round(100 * (cast(sum(a.total) as float) / CASE WHEN count(a.lineages) = 0 THEN max(a.total_reads) ELSE cast((select sum(s.total) as total FROM sequenceanalysis.alignment_summary s WHERE s.analysis_id = AnalysisId AND s.rowid IN (
32+
SELECT distinct asj.alignment_id from sequenceanalysis.alignment_summary_junction asj WHERE asj.analysis_id = AnalysisId AND asj.ref_nt_id.locus = a.loci and asj.status = true
4633
)
4734
) as float) END), 2) as percent_from_locus,
4835
max(lastModified) as lastModified,
@@ -67,14 +54,15 @@ FROM (
6754
total_forward,
6855
total_reverse,
6956
valid_pairs,
70-
(select sum(total) as total FROM sequenceanalysis.alignment_summary s WHERE s.analysis_id = a.analysis_id) as total_reads,
57+
(select sum(total) as total FROM sequenceanalysis.alignment_summary s WHERE s.analysis_id = AnalysisId) as total_reads,
7158
max(j.modified) as lastModified
7259
from sequenceanalysis.alignment_summary a
73-
left join sequenceanalysis.alignment_summary_junction j ON (j.alignment_id = a.rowid and j.status = true)
60+
left join sequenceanalysis.alignment_summary_junction j ON (j.analysis_id = AnalysisId AND j.alignment_id = a.rowid and j.status = true)
7461
left join sequenceanalysis.haplotype_sequences hs ON ((
7562
(hs.name = j.ref_nt_id.lineage AND hs.type = 'Lineage') OR
7663
(hs.name = j.ref_nt_id.name AND hs.type = 'Allele')
7764
) AND hs.haplotype.datedisabled IS NULL)
65+
WHERE a.analysis_id = AnalysisId
7866
group by a.analysis_id, a.rowid, a.total, total_forward, total_reverse, valid_pairs
7967

8068
) a
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
-- This is a reversal of SequenceAnalysis-12.329-12.330.sql:
2+
DROP INDEX IF EXISTS sequenceanalysis.IDX_asj_status_container_alignment_id_ref_nt_id;
3+
DROP INDEX IF EXISTS sequenceanalysis.IDX_haplotypes_name_date;
4+
DROP INDEX IF EXISTS sequenceanalysis.IDX_haplotype_sequences_name_haplotype_type;
5+
DROP INDEX IF EXISTS sequenceanalysis.IDX_alignment_summary_analysis_id_rowid_container_total;
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
SELECT core.executeJavaUpgradeCode('migrateSequenceDirs');
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
-- This is a reversal of SequenceAnalysis-12.329-12.330.sql:
2+
DROP INDEX IDX_asj_status_container_alignment_id_ref_nt_id ON sequenceanalysis.alignment_summary_junction;
3+
DROP INDEX IDX_haplotypes_name_date ON sequenceanalysis.haplotypes;
4+
DROP INDEX IDX_haplotype_sequences_name_haplotype_type ON sequenceanalysis.haplotype_sequences;
5+
DROP INDEX IDX_alignment_summary_analysis_id_rowid_container_total ON sequenceanalysis.alignment_summary;
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
EXEC core.executeJavaUpgradeCode 'migrateSequenceDirs';

0 commit comments

Comments
 (0)