-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpreprocess_songs.py
More file actions
100 lines (80 loc) · 3.89 KB
/
preprocess_songs.py
File metadata and controls
100 lines (80 loc) · 3.89 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import os
from random import shuffle
import math
import json
from random import shuffle
import math
from sklearn.model_selection import train_test_split
from dataset_manager import get_all_song_paths_and_labels, get_all_song_paths_and_labels_FMA
import numpy as np
import melspec , utils
import config
def split_and_label():
with open(config.ALL_SONGS_LABELS) as f:
genres = f.read().splitlines()
with open(config.ALL_SONGS_PATHS) as g:
songs = g.read().splitlines()
X_train, X_test, y_train, y_test = train_test_split(songs, genres, test_size=config.TESTING_RATIO, random_state=1)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=config.VALIDATION_RATIO, random_state=1)
with open (config.TRAINING_SONGS_PATHS,"w")as fp:
for line in X_train:
fp.write(line+"\n")
with open (config.TRAINING_SONGS_LABELS,"w")as fp:
for line in y_train:
fp.write(line+"\n")
with open (config.TESTING_SONGS_PATHS,"w")as fp:
for line in X_test:
fp.write(line+"\n")
with open (config.TESTING_SONGS_LABELS,"w")as fp:
for line in y_test:
fp.write(line+"\n")
with open (config.VALIDATION_SONGS_PATHS,"w")as fp:
for line in X_val:
fp.write(line+"\n")
with open (config.VALIDATION_SONGS_LABELS,"w")as fp:
for line in y_val:
fp.write(line+"\n")
print (len(genres))
print ( len (X_train))
print ( len (X_test))
print (len(X_val))
allsongdict = {}
for genre in genres:
if not (genre in allsongdict):
allsongdict[genre] = []
for i in range(len(genres)):
allsongdict[genres[i]].append(songs[i])
for key in allsongdict:
print ( key, ':', len(allsongdict[key]))
shuffle(allsongdict[key])
def generate_h5_files():
tags= utils.load(config.GENRES_FILE)
training_paths = utils.load(config.TRAINING_SONGS_PATHS)
training_labels = utils.name2num(utils.load(config.TRAINING_SONGS_LABELS),tags)
validation_paths = utils.load(config.VALIDATION_SONGS_PATHS)
validation_labels = utils.name2num(utils.load(config.VALIDATION_SONGS_LABELS),tags)
testing_paths = utils.load(config.TESTING_SONGS_PATHS)
testing_labels = utils.name2num(utils.load(config.TESTING_SONGS_LABELS),tags)
x_test, num_frames_test = melspec.extract_melgrams(testing_paths, config.MULTIFRAMES, trim_song=True)
x_train, num_frames_train = melspec.extract_melgrams(training_paths, config.MULTIFRAMES, trim_song=True)
x_validate, num_frames_validate = melspec.extract_melgrams(validation_paths, config.MULTIFRAMES, trim_song=True)
y_train = np.array(training_labels)
y_validate = np.array(validation_labels)
y_test = np.array(testing_labels)
utils.save_h5(config.TRAINING_MELSPEC_FILE,x_train,y_train,num_frames_train)
utils.save_h5(config.VALIDATION_MELSPEC_FILE,x_validate,y_validate,num_frames_validate)
utils.save_h5(config.TESTING_MELSPEC_FILE,x_test,y_test,num_frames_test)
def merge_all_h5s():
x_train, y_train, num_frames_train = utils.load_h5(config.TRAINING_MELSPEC_FILE)
x_validate, y_validate, num_frames_validate = utils.load_h5(config.VALIDATION_MELSPEC_FILE)
x_test, y_test, num_frames_test = utils.load_h5(config.TESTING_MELSPEC_FILE)
x_all=np.concatenate((np.concatenate((x_train,x_validate),axis=0),x_test),axis=0)
y_all=np.concatenate((np.concatenate((y_train,y_validate),axis=0),y_test),axis=0)
num_frames_all=np.concatenate((np.concatenate((num_frames_train,num_frames_validate),axis=0),num_frames_test),axis=0)
print x_all.shape
print y_all.shape
utils.save_h5(config.ALL_SONGS_MELSPEC_FILE,x_all,y_all,num_frames_all)
if __name__=="__main__":
get_all_song_paths_and_labels(config.SONG_FLODER)
split_and_label()
generate_h5_files()