-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathISEC_VCF_9k+gbs+BOPA_annotation.sh
More file actions
87 lines (70 loc) · 3.54 KB
/
ISEC_VCF_9k+gbs+BOPA_annotation.sh
File metadata and controls
87 lines (70 loc) · 3.54 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#!/bin/bash
# Input and Output Directories
INPUTDIR="/Users/jacobpacheco/Desktop/DRUM_Submission"
OUTPUTDIR="/Users/jacobpacheco/Desktop/OUTPUTTED_VCFS"
# Clean and create output directory
rm -rf "$OUTPUTDIR"
mkdir -p "$OUTPUTDIR"
# Compress and index input files if necessary
echo "Ensuring all input files are compressed and indexed..."
bgzip -c "$INPUTDIR/9k_idt90_noRescuedSNPs.vcf" > "$INPUTDIR/9k_idt90_noRescuedSNPs.vcf.gz"
bcftools index -f "$INPUTDIR/wbdc_318_BOPA_morex_v3.vcf.gz"
bcftools index -f "$INPUTDIR/WBDC_GBS_snps_morex_v3.biallelic.mafgt0.0016.filt_miss_het.vcf.gz"
bcftools index -f "$INPUTDIR/9k_idt90_noRescuedSNPs.vcf.gz"
# Perform a three-way intersection
echo -e "\nPerforming three-way intersection..."
bcftools isec -n+3 -p "$OUTPUTDIR/three_way" \
"$INPUTDIR/wbdc_318_BOPA_morex_v3.vcf.gz" \
"$INPUTDIR/WBDC_GBS_snps_morex_v3.biallelic.mafgt0.0016.filt_miss_het.vcf.gz" \
"$INPUTDIR/9k_idt90_noRescuedSNPs.vcf.gz"
# Compress and index the intersection file
echo -e "\nCompressing and indexing the intersection file..."
bgzip -c "$OUTPUTDIR/three_way/0000.vcf" > "$OUTPUTDIR/three_way/0000.vcf.gz"
bcftools index "$OUTPUTDIR/three_way/0000.vcf.gz"
# Annotate variants in the intersection
echo -e "\nAnnotating variants in the intersection..."
bcftools annotate \
-a "$INPUTDIR/wbdc_318_BOPA_morex_v3.vcf.gz" \
-c CHROM,POS,ID \
-o "$OUTPUTDIR/temp_annotated_bopa.vcf" \
"$OUTPUTDIR/three_way/0000.vcf.gz"
bgzip -c "$OUTPUTDIR/temp_annotated_bopa.vcf" > "$OUTPUTDIR/temp_annotated_bopa.vcf.gz"
bcftools index "$OUTPUTDIR/temp_annotated_bopa.vcf.gz"
echo -e "\nAdding 9k IDs to the intersection annotations..."
bcftools annotate \
-a "$INPUTDIR/9k_idt90_noRescuedSNPs.vcf.gz" \
-c CHROM,POS,ID \
-o "$OUTPUTDIR/temp_annotated_final.vcf" \
"$OUTPUTDIR/temp_annotated_bopa.vcf.gz"
bgzip -c "$OUTPUTDIR/temp_annotated_final.vcf" > "$OUTPUTDIR/temp_annotated_final.vcf.gz"
bcftools index "$OUTPUTDIR/temp_annotated_final.vcf.gz"
# Merge intersection annotations back into the original GBS file
echo -e "\nMerging annotations back into the original GBS file..."
bcftools annotate \
-a "$OUTPUTDIR/temp_annotated_final.vcf.gz" \
-c CHROM,POS,ID \
-k \
-o "$OUTPUTDIR/WBDC_GBS_final_annotated.vcf" \
"$INPUTDIR/WBDC_GBS_snps_morex_v3.biallelic.mafgt0.0016.filt_miss_het.vcf.gz"
bgzip -c "$OUTPUTDIR/WBDC_GBS_final_annotated.vcf" > "$OUTPUTDIR/WBDC_GBS_final_annotated.vcf.gz"
bcftools index "$OUTPUTDIR/WBDC_GBS_final_annotated.vcf.gz"
# Final output message
echo -e "\nDone! Final annotated file with all variants retained is: WBDC_GBS_final_annotated.vcf.gz"
# BOPA vs GBS
echo -e "\nComparing BOPA and GBS..."
bcftools isec -n=2 -p "$OUTPUTDIR/bopa_gbs" \
"$INPUTDIR/wbdc_318_BOPA_morex_v3.vcf.gz" \
"$INPUTDIR/WBDC_GBS_snps_morex_v3.biallelic.mafgt0.0016.filt_miss_het.vcf.gz"
echo "Number of shared variants between BOPA and GBS: $(grep -vc '^#' "$OUTPUTDIR/bopa_gbs/0001.vcf")"
# BOPA vs 9k
echo -e "\nComparing BOPA and 9k..."
bcftools isec -n=2 -p "$OUTPUTDIR/bopa_9k" \
"$INPUTDIR/wbdc_318_BOPA_morex_v3.vcf.gz" \
"$INPUTDIR/9k_idt90_noRescuedSNPs.vcf.gz"
echo "Number of shared variants between BOPA and 9k datasets: $(grep -vc '^#' "$OUTPUTDIR/bopa_9k/0001.vcf")"
# GBS vs 9k
echo -e "\nComparing GBS and 9k..."
bcftools isec -n=2 -p "$OUTPUTDIR/gbs_9k" \
"$INPUTDIR/WBDC_GBS_snps_morex_v3.biallelic.mafgt0.0016.filt_miss_het.vcf.gz" \
"$INPUTDIR/9k_idt90_noRescuedSNPs.vcf.gz"
echo "Number of shared variants between GBS and 9k datasets: $(grep -vc '^#' "$OUTPUTDIR/gbs_9k/0001.vcf")"