-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathspeech-rec.sh
More file actions
131 lines (112 loc) · 3.23 KB
/
speech-rec.sh
File metadata and controls
131 lines (112 loc) · 3.23 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
#!/bin/bash
# Usage info
show_help() {
cat << EOF
Usage: ${0##*/} [-h] [-i INFILE] [-d DURATION] [-r RATE] [-l LANGUAGE] [-k KEY]
Record an utterance and send audio data to Google for speech recognition.
-h|--help display this help and exit.
-i|--input INFILE use INFILE instead of recording a stream with sox or parecord.
-d|--duration FLOAT recoding duration in seconds (Default: 3).
-l|--language STRING set transcription language (Default: en_US).
Other languages: fr_FR, de_DE, es_ES, ...
-r|--rate INTEGER Sampling rate of audio data (Default: 16000, if data is to be recorded).
If -i|--input is used, the sampling rate must be supplied by the user.
-k|--key STRING Google Speech Recognition Key.
EOF
}
DURATION=3
LANGUAGE=en_US
# Please replace this with your own key
KEY=AIzaSyAcalCzUvPmmJ7CZBFOEWx2Z1ZSn4Vs1gg
record() {
DURATION=$1
SRATE=$2
INFILE=$3
if hash rec 2>/dev/null; then
# try to record audio with sox
rec -q -c 1 -r $SRATE $INFILE trim 0 $DURATION
else
# fallback to parecord
timeout $DURATION parecord $INFILE --file-format=flac --rate=$SRATE --channels=1
fi
}
# parse parameters
while [[ $# -ge 1 ]]
do
key="$1"
case $key in
-h|--help)
show_help
exit 0
;;
-i|--input)
INFILE="$2"
shift
;;
-d|--duration)
DURATION="$2"
shift
;;
-r|--rate)
SRATE=$2
shift
;;
-l|--language)
LANGUAGE="$2"
shift
;;
-k|--key)
KEY="$2"
shift
;;
*)
echo "Unknown parameter '$key'. Type $0 -h for more information."
exit 1
;;
esac
shift
done
if [[ ! "$DURATION" ]]
then
echo "ERROR: empty or invalid value for duration."
exit 1
fi
if [[ ! "$LANGUAGE" ]]
then
echo "ERROR: empty value for language."
exit 1
fi
if [[ ! "$INFILE" ]]
then
INFILE="./records/record_"`date "+%Y%b%d_%H-%M-%S"`.flac
if [[ ! "$SRATE" ]]
then
SRATE=16000
fi
echo "Say something..."
echo ""
record $DURATION $SRATE $INFILE
else
if [[ ! "$SRATE" ]]
then
>&2 echo "ERROR: no sampling rate specified for input file."
exit 1
fi
echo "Try to recognize speech from file $INFILE"
echo ""
fi
RESULT=`wget -q --post-file $INFILE --header="Content-Type: audio/x-flac; rate=$SRATE" -O - "https://www.google.com/speech-api/v2/recognize?client=chromium&lang=$LANGUAGE&key=$KEY"`
FILTERED=`echo "$RESULT" | grep "transcript.*}" | sed 's/,/\n/g;s/[{,},"]//g;s/\[//g;s/\]//g;s/:/: /g' | grep -o -i -e "transcript.*" -e "confidence:.*"`
CONFIDENCE=`echo "$RESULT" | grep "transcript.*}" | sed 's/,/\n/g;s/[{,},"]//g;s/\[//g;s/\]//g;s/:/: /g' | grep -o -i -e "confidence:.*"`
if [[ ! "$FILTERED" ]]
then
cat /dev/null > "words.log"
>&2 echo "Google was unable to recognize any speech in audio data"
else
echo "Recognition result:"
echo ""
cat /dev/null > "words.log"
echo ""
echo "$FILTERED" >> "words.log"
fi
exit 0