-
Notifications
You must be signed in to change notification settings - Fork 0
/
sounds.py
119 lines (68 loc) · 2.77 KB
/
sounds.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import librosa
import soundfile
import os, glob, pickle
import numpy as np
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
#Extract features (mfcc, chroma, mel) from a sound file
def extract_feature(file_name, mfcc, chroma, mel):
with soundfile.SoundFile(file_name) as sound_file:
X = sound_file.read(dtype="float32")
sample_rate=sound_file.samplerate
if chroma:
stft=np.abs(librosa.stft(X))
result=np.array([])
if mfcc:
mfcc=np.mean(librosa.feature.mfcc(y=X, sr=sample_rate,n_mfcc=40).T, axis=0)
if chroma:
chroma=np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
result=np.hstack((result, chroma))
if mel:
mel=np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
result=np.hstack((result, mel))
return result
#Emotions in the RAVDESS dataset
emotions={
'01':'neutral',
'02':'calm',
'03':'happy',
'04':'sad',
'05':'angry',
'06':'fearful',
'07':'disgust',
'08':'surpried'
}
#Emotions to observe
observed_emotions=['calm','happy','fearful','disgust']
#Load the data and extract features for each sound file
def load_data(test_size=0.2):
x,y=[],[]
for file in glob.glob("C:\\Users\\VC\\Downloads\\speech-emotion-recognition\\speech-emotion-recognition-ravdess-data\\Actor_*\\*.wav"):
file_name=os.path.basename(file)
emotion=emotions[file_name.split("-")[2]]
if emotion not in observed_emotions:
continue
feature=extract_feature(file, mfcc=True, chroma=True, mel=True)
x.append(feature)
y.append(emotion)
#prints all the features and emotions
print(x,y)
return train_test_split(np.array(x), y, test_size=test_size, random_state=9)
#Split the dataset
x_train,x_test,y_train,y_test=load_data(test_size=0.25)
#Get the shape of the training and testing data sets
print((x_train.shape[0],x_test.shape[0]))
#Number of features extracted - count
print(f'Features extracted: {x_train.shape[1]}')
#Initializing multilayer Perceptron classifier - set the max iteration based on achieving the convergence point
model=MLPClassifier(alpha=0.01,batch_size=256, epsilon=1e-08, hidden_layer_sizes=(300,), learning_rate='adaptive', max_iter=1000)
#Train the model
model.fit(x_train,y_train)
#Predict for the test set
y_pred=model.predict(x_test)
#Calculating accuracy of the model
accuracy=accuracy_score(y_true=y_test, y_pred=y_pred)
#Printing accuracy
print("Accuracy: {:.2f}%".format(accuracy*100))