-
Notifications
You must be signed in to change notification settings - Fork 613
Expand file tree
/
Copy pathVoiceActivityDetection.py
More file actions
35 lines (28 loc) · 1.36 KB
/
VoiceActivityDetection.py
File metadata and controls
35 lines (28 loc) · 1.36 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
__author__ = 'Varun Nayyar'
__doc__ = \
"""
This file is to be modified by users to provide their own Voice Activity Detection (VAD) functions.
I.e. not all frames will have speech present and it is common to remove these frames in many situations
These functions can be used in most base functions by passing VAD = myVADfunction where
myVADfunction follows the template provided.
"""
import numpy as np
def templateVAD(frames, sig):
"""
:param frames: numpy array of [NumFrames][SamplesPerFrame] of all the speech frames
:param sig: The entire signal [signLen]
:return: the subset of frames where there is voiced activity detected
"""
raise NotImplementedError
def simpleVAD(frames, sig, threshold=0.01):
"""
:param frames: numpy array of [NumFrames][SamplesPerFrame] of all the speech frames
:param sig: The entire signal [signLen]
:param threshold: above what level of average power must the frame be to be considered to have activity
:return: the subset of frames where there is voiced activity detected
Note that the variance of frame/signal represents the average power of the frame/signal
so this is a power threshold activity detector applied along the frames
"""
frameVars = np.var(frames, 1)
reducedFrames = frames[np.where(frameVars > sig.var() * threshold)]
return reducedFrames