-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathsniffles_var_summary.R
More file actions
60 lines (40 loc) · 1.82 KB
/
sniffles_var_summary.R
File metadata and controls
60 lines (40 loc) · 1.82 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
library(data.table)
library("optparse")
option_list = list(
make_option(c("-f", "--file"), type="character", default=NULL,
help="sniffles VCF", metavar="character")
# make_option(c("-p", "--prefix"), type="character", default="coverage",
# help="prefix to name the output file", metavar="character")
);
opt_parser = OptionParser(description="Get SV summary table out of sniffles VCF",option_list=option_list);
opt = parse_args(opt_parser);
if (is.null(opt$file)){
print_help(opt_parser)
stop("Please add a file -f <file.txt>", call.=FALSE)
}
command <- paste0("sh /lustre/nobackup/WUR/ABGC/moiti001/TOOLS/scripts/get_sniffles_summary.sh -vcf=", opt$file)
system(command)
# system(paste0("vcf-query -f '%CHROM\t%INFO/SVTYPE\t%POS\t%INFO/END\t%INFO/CHR2\t%INFO/SVLEN\t%INFO/RE\n'" , opt$file, " >> sniffles_summary.txt"))
sniffles_summary <- fread("sniffles_summary.txt")
# Total number of each variant
print("TOTAL NUMBER OF STRUCTURAL VARIANTS")
sniffles_summary[,.N, by=SV]
# Min and max size of variant
print("MIN AND MAX SIZE OF EACH VARIANT")
struct_maxsize <- sniffles_summary[,max(abs(as.numeric(Len))),by=SV]
struct_min <- sniffles_summary[,min(abs(as.numeric(Len))), by=SV]
struct_size_limits <- merge(struct_min, struct_maxsize, by="SV")
colnames(struct_size_limits) <- c("var", "min_size", "max_size")
struct_size_limits
# Number of each variant per chr
print("NUMBER OF EACH VARIANT PER CHR")
sniffles_summary[, .N, by=c("Chr","SV")]
# Number of variants found by chr
print("NUMBER OF VARIANTS FOUND IN EACH CHR")
sniffles_summary[, .N, by=Chr]
# Order by number of variants
print("NUMBER OF VARIANTS FOUND IN EACH CHR ORDERED BY NUM OF VAR")
sniffles_summary[, .N, by=Chr][order(-N)]
# Number of each GT per variant
print("GENOTYPE PER SV")
sniffles_summary[,.N, by=c("SV", "GT")][order(SV)]