-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathextract_features.py
96 lines (86 loc) · 3.16 KB
/
extract_features.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
from scipy.io import wavfile
import matplotlib.pyplot as plt
import librosa
import os, tempfile, warnings
import numpy as np
import argparse
import warnings
from multiprocessing import Pool
def add_feature(mfcc1, rmsa1):
tmfcc1 = np.zeros((mfcc1.shape[0],mfcc1.shape[1]+rmsa1.shape[0]))
n = mfcc1.shape[0]
m = mfcc1.shape[1]
w = rmsa1.shape[0]
tmfcc1[0:n,0:m] = mfcc1[0:n,0:m]
tmfcc1[0:n,m:m+w] = np.transpose(rmsa1[0:w,0:n])
return tmfcc1
def mfcc(audio, nwin=256, nfft=512, fs=16000, nceps=13):
#return librosa.feature.mfcc(y=audio, sr=44100, hop_length=nwin, n_mfcc=nceps)
return [np.transpose(librosa.feature.mfcc(y=audio, sr=fs, n_fft=nfft, win_length=nwin,n_mfcc=nceps))]
def std_mfcc(mfcc):
return (mfcc - np.mean(mfcc, axis=0)) / np.std(mfcc, axis=0)
def extract_features_from_file(filename):
audio = wavfile.read(filename, mmap=True)[1] / (2.0 ** 15)
audio=audio[0:44100]
return extract_features(audio,fs=44100)
def extract_features(buffer,fs=44100):
sr = fs
a1 = buffer
# print(a1.shape)
mfcc1 = mfcc(a1, nwin=256, nfft=512, fs=fs, nceps=26)[0]
mfcc1 = std_mfcc(mfcc1)
rmsa1 = librosa.feature.rms(a1)
cent1 = librosa.feature.spectral_centroid(y=a1, sr=fs)
rolloff1 = librosa.feature.spectral_rolloff(y=a1, sr=fs, roll_percent=0.1)
chroma_cq1 = librosa.feature.chroma_cqt(y=a1, sr=fs, n_chroma=10)
onset_env1 = librosa.onset.onset_strength(y=a1, sr=sr)
pulse1 = librosa.beat.plp(onset_envelope=onset_env1, sr=sr)
mfcc1 = add_feature(mfcc1, rmsa1)
mfcc1 = add_feature(mfcc1, rolloff1/fs)
mfcc1 = add_feature(mfcc1, cent1/fs)
mfcc1 = add_feature(mfcc1, chroma_cq1)
mfcc1 = add_feature(mfcc1, onset_env1.reshape(1,onset_env1.shape[0]))
mfcc1 = add_feature(mfcc1, pulse1.reshape(1,onset_env1.shape[0]))
return mfcc1
def uniform_features(features,n=40):
s = features.shape
o = np.zeros(s[0] * n)
N = s[0]*n
o[0:N] = features.reshape((s[0] * s[1],))[0:N]
return o
def mean_features(features):
return np.mean(features, axis=1)
def test_driver():
fname = "amir/amir/3_stack/sounds/1008.wav"
features = extract_features_from_file(fname)
print(features.shape)
print(features)
plt.imshow(features)
plt.show()
ufeatures = uniform_features(features)
print(ufeatures.shape)
print(ufeatures)
mfeatures = mean_features(features)
print(mfeatures.shape)
print(mfeatures)
plt.imshow(mfeatures.reshape((29,3)))
plt.show()
def parse_args():
parser = argparse.ArgumentParser(description='Process some sounds.')
parser.add_argument('sounds', metavar='N', type=str, nargs='+',
help='path to Sounds to load')
args = parser.parse_args()
return args
def makeCSV(sound):
try:
row = mean_features(extract_features_from_file(sound))
print(",".join(['"'+sound+'"', ",".join([str(x) for x in row])]))
except Exception as e:
warnings.warn(str(e))
if __name__ == "__main__":
args = parse_args()
sounds = args.sounds
sounds.sort()
pool = Pool() # Create a multiprocessing Pool
pool.map(makeCSV,sounds)
# test_driver()