forked from BimberLab/DiscvrLabKeyModules
-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathSamtoolsCramConverter.java
More file actions
102 lines (81 loc) · 2.86 KB
/
SamtoolsCramConverter.java
File metadata and controls
102 lines (81 loc) · 2.86 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
package org.labkey.api.sequenceanalysis.pipeline;
import org.apache.logging.log4j.Logger;
import org.jetbrains.annotations.Nullable;
import org.labkey.api.pipeline.PipelineJobException;
import java.io.File;
import java.util.ArrayList;
import java.util.List;
/**
* Created by bimber on 11/4/2016.
*/
public class SamtoolsCramConverter extends SamtoolsRunner
{
public SamtoolsCramConverter(Logger log)
{
super(log);
}
public File convert(File inputBam, File outputCram, File gzippedFasta, boolean doIndex, @Nullable Integer threads, boolean archivalMode) throws PipelineJobException
{
getLogger().info("Converting SAM/BAM to CRAM: " + inputBam.getPath());
if (inputBam.equals(outputCram))
{
throw new PipelineJobException("Input/output files are the same");
}
List<String> params = new ArrayList<>();
params.add(getSamtoolsPath().getPath());
params.add("view");
params.add("--output-fmt");
params.add("cram,version=3.0" + (archivalMode ? ",lossy_names=1" : ""));
params.add("-o");
params.add(outputCram.getPath());
// CRAM does, however, have an optional archive settings mode (samtools view ...)
// which is a lossy compression, doing things like removing read names, removing additional accessory fields, and additional compression of quality scores.
// In all cases, the base sequence of the reads is preserved: https://www.htslib.org/doc/samtools.html
if (archivalMode)
{
params.add("--output-fmt-option");
params.add("archive");
}
params.add("-T");
params.add(gzippedFasta.getPath());
if (doIndex)
{
params.add("--write-index");
}
if (threads != null)
{
params.add("-@");
params.add(String.valueOf(threads));
}
params.add(inputBam.getPath());
execute(params);
if (!outputCram.exists())
{
throw new PipelineJobException("Missing output: " + outputCram.getPath());
}
return outputCram;
}
public File doIndex(File input, @Nullable Integer threads) throws PipelineJobException
{
List<String> params = new ArrayList<>();
params.add(getSamtoolsPath().getPath());
params.add("index");
if (threads != null)
{
params.add("-@");
params.add(String.valueOf(threads));
}
params.add(input.getPath());
execute(params);
File idx = getExpectedCramIndex(input);
if (!idx.exists())
{
throw new PipelineJobException("Unable to find CRAM index: " + idx.getPath());
}
return idx;
}
public static File getExpectedCramIndex(File input)
{
return new File(input.getPath() + ".crai");
}
}