-
Notifications
You must be signed in to change notification settings - Fork 53
Expand file tree
/
Copy pathfeat_extract.py
More file actions
96 lines (82 loc) · 3.28 KB
/
feat_extract.py
File metadata and controls
96 lines (82 loc) · 3.28 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
#!/usr/bin/env python
# coding= UTF-8
#
# Author: Fing
# Date : 2017-12-03
#
import code
import glob
import os
import librosa
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.pyplot import specgram
import soundfile as sf
import sounddevice as sd
import queue
def extract_feature(file_name=None):
if file_name:
print('Extracting', file_name)
X, sample_rate = sf.read(file_name, dtype='float32')
else:
device_info = sd.query_devices(None, 'input')
sample_rate = int(device_info['default_samplerate'])
q = queue.Queue()
def callback(i,f,t,s): q.put(i.copy())
data = []
with sd.InputStream(samplerate=sample_rate, callback=callback):
while True:
if len(data) < 100000: data.extend(q.get())
else: break
X = np.array(data)
if X.ndim > 1: X = X[:,0]
X = X.T
# short term fourier transform
stft = np.abs(librosa.stft(X))
# mfcc (mel-frequency cepstrum)
mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T,axis=0)
# chroma
chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
# melspectrogram
mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
# spectral contrast
contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)
tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T,axis=0)
return mfccs,chroma,mel,contrast,tonnetz
def parse_audio_files(parent_dir,file_ext='*.ogg'):
sub_dirs = os.listdir(parent_dir)
sub_dirs.sort()
features, labels = np.empty((0,193)), np.empty(0)
for label, sub_dir in enumerate(sub_dirs):
if os.path.isdir(os.path.join(parent_dir, sub_dir)):
for fn in glob.glob(os.path.join(parent_dir, sub_dir, file_ext)):
try: mfccs, chroma, mel, contrast,tonnetz = extract_feature(fn)
except Exception as e:
print("[Error] extract feature error in %s. %s" % (fn,e))
continue
ext_features = np.hstack([mfccs,chroma,mel,contrast,tonnetz])
features = np.vstack([features,ext_features])
# labels = np.append(labels, fn.split('/')[1])
labels = np.append(labels, label)
print("extract %s features done" % (sub_dir))
return np.array(features), np.array(labels, dtype = np.int)
def parse_predict_files(parent_dir,file_ext='*.ogg'):
features = np.empty((0,193))
filenames = []
for fn in glob.glob(os.path.join(parent_dir, file_ext)):
mfccs, chroma, mel, contrast,tonnetz = extract_feature(fn)
ext_features = np.hstack([mfccs,chroma,mel,contrast,tonnetz])
features = np.vstack([features,ext_features])
filenames.append(fn)
print("extract %s features done" % fn)
return np.array(features), np.array(filenames)
def main():
# Get features and labels
features, labels = parse_audio_files('data')
np.save('feat.npy', features)
np.save('label.npy', labels)
# Predict new
features, filenames = parse_predict_files('predict')
np.save('predict_feat.npy', features)
np.save('predict_filenames.npy', filenames)
if __name__ == '__main__': main()