-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathget_alignments.bash
More file actions
111 lines (87 loc) · 3.55 KB
/
get_alignments.bash
File metadata and controls
111 lines (87 loc) · 3.55 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
#This program is meant to read in an HMM Profile file and search it against the UniProtKB database.
#It returns an AFA file containing a multiple sequence alignment of each hit. It should be noted
#that the searching algorithm logs a different entry for each domain match. This means that if a
#single protein seqeunce contains two seperate regions matching the HMM Profile, then there will be
#two seperate entries for that protein, each one containing one part of the total seqeunce that
#contains the matching sequence.
#!/bin/bash
#Asks for seed sequence or HMM?
printf "\n\n\nDo you have an HMM Profile? Otherwise it will ask for a file containing seed sequences. (yes/no)\n"
read hmmOrSeed
if [ "$hmmOrSeed" = "yes" ]; then
#Gets name of hmm profile file
printf "Please enter the name of the hmm profile file.\n"
read hmmProfile
if [ -f "$hmmProfile" ]; then
printf ""
else
printf "\nFile $hmmProfile does not exist, closing program.\n"
exit
fi
elif [ "$hmmOrSeed" = "no" ]; then
#Gets name of seed seqeunces file
printf "Please enter the name of the seed seqeunces file.\n"
read seedSeq
if [ -f "$seedSeq" ]; then
#Gets name of hmm profile file
printf "What would you like your HMM Profile file to be called?.\n"
read hmmProfile
else
printf "\nFile $seedSeq does not exist, closing program.\n"
exit
fi
else
printf "\nYou did not enter either "yes" or "no", closing program.\n"
exit
fi
#Gets name of hmm profile file
printf "Do you want to limit the search to just the annotated sequences database? Otherwise it will search the total database. (yes/no)\n"
read whichDatabase
if [ "$whichDatabase" = "yes" ]; then
database="/zfs/smblab/group_software/HMMER/db/uniprot_sprot.fasta.gz"
elif [ "$whichDatabase" = "no" ]; then
database="/zfs/smblab/group_software/HMMER/db/uniprot_trembl.fasta.gz"
elif [ "$whichDatabase" = "pfam" ]; then
database="/zfs/smblab/group_software/HMMER/db/Pfam-A.fasta.gz"
elif [ "$whichDatabase" = "pdz" ]; then
database="/zfs/smblab/group_software/HMMER/db/PDZ12_complete.fasta"
elif [ "$whichDatabase" = "pbpd" ]; then
database="/zfs/smblab/group_software/HMMER/db/PF13407.alignment.full_reformat.fasta.gz"
else
printf "\nYou did not enter either "yes" or "no", closing program.\n"
exit
fi
#Gets maximum number of gaps to be cut out
printf "Please enter the number of maximum number of continuous gaps per sequence.\n"
read maxGaps
reg='^-?[0-9]+([.][0-9]+)?$'
if ! [[ $maxGaps =~ $reg ]] ; then
printf "\nInput was not a positive number. Exiting program.\n"
exit
else
printf ""
fi
#Gets name of output
printf "What would you like your output file to be called?\n"
read outputFileName
#Builds HMM profile
if [ "$hmmOrSeed" = "no" ]; then
printf "\n\nBuilding HMM Profile...\n"
/zfs/smblab/group_software/HMMER/install/bin/hmmbuild $hmmProfile $seedSeq > /dev/null 2>&1
printf "\nFinished building HMM Profile...\n"
else
printf "\n"
fi
printf "\nSearching the databases using hmmsearch...\n"
#runs hmmsearch
/zfs/smblab/group_software/HMMER/install/bin/hmmsearch -A get_alignments_tempfile1 $hmmProfile $database > get_alignments_tempfile2
printf "\nhmmearch completed!\n"
printf "\nConverting output to AFA.\n"
#runs esl-reformat
/zfs/smblab/group_software/HMMER/hmmer-3.3.2/easel/miniapps/esl-reformat -o $outputFileName afa get_alignments_tempfile1
printf "\nConversion complete!\n"
printf "\nFiltering output...\n"
python /home/ghamil4/FRET_Design/DCAscript/filter_pfam_args.py $outputFileName $maxGaps
printf "\nDone filtering output!\n"
#rm get_alignments_tempfile1
#rm get_alignments_tempfile2