-
Notifications
You must be signed in to change notification settings - Fork 15
Trying to work on functionality for the display and other updates #15
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
7ace4f2
604a85b
aef8ba1
0a7d7bc
8b8e446
a481640
167b941
78bdf80
8780942
1314012
6dfce05
d0c297f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||||
---|---|---|---|---|---|---|---|---|---|---|
|
@@ -26,7 +26,7 @@ function hz_to_mel(frequencies::Union{F, AbstractArray{F}}) where {F <: Real} | |||||||||
f_min = 0f0 | ||||||||||
f_sp = 200f0 / 3 | ||||||||||
|
||||||||||
mels = collect((frequencies - f_min) / f_sp) | ||||||||||
mels = collect((frequencies .- f_min) ./ f_sp) | ||||||||||
|
||||||||||
min_log_hz = 1000f0 | ||||||||||
min_log_mel = (min_log_hz - f_min) / f_sp | ||||||||||
|
@@ -45,7 +45,7 @@ end | |||||||||
function mel_to_hz(mels::Union{F, AbstractArray{F}}) where {F <: Real} | ||||||||||
f_min = 0f0 | ||||||||||
f_sp = 200f0 / 3 | ||||||||||
frequencies = collect(f_min + f_sp * mels) | ||||||||||
frequencies = collect(f_min .+ f_sp .* mels) | ||||||||||
|
||||||||||
min_log_hz = 1000f0 | ||||||||||
min_log_mel = (min_log_hz - f_min) / f_sp | ||||||||||
|
@@ -65,39 +65,43 @@ function mel_frequencies(nmels::Int = 128, fmin::Real = 0.0f0, fmax::Real = 1102 | |||||||||
min_mel = hz_to_mel(fmin)[1] | ||||||||||
max_mel = hz_to_mel(fmax)[1] | ||||||||||
|
||||||||||
mels = linspace(min_mel, max_mel, nmels) | ||||||||||
mels = range(min_mel, max_mel, length=nmels) | ||||||||||
mel_to_hz(mels) | ||||||||||
end | ||||||||||
|
||||||||||
"""""" | ||||||||||
function mel(samplerate::Real, nfft::Int, nmels::Int = 128, fmin::Real = 0f0, fmax::Real = samplerate/2f0) | ||||||||||
weights = zeros(Float32, nmels, (nfft >> 1) + 1) | ||||||||||
second_dim = ((nfft >> 1) + 1) | ||||||||||
weights = zeros(Float32, nmels, second_dim) | ||||||||||
fftfreqs = fft_frequencies(samplerate, nfft) | ||||||||||
melfreqs = mel_frequencies(nmels + 2, fmin, fmax) | ||||||||||
enorm = 2f0 ./ (melfreqs[3:end] - melfreqs[1:nmels]) | ||||||||||
enorm = 2f0 ./ (melfreqs[3:end] .- melfreqs[1:nmels]) | ||||||||||
|
||||||||||
for i in 1:nmels | ||||||||||
lower = (fftfreqs - melfreqs[i]) / (melfreqs[i+1] - melfreqs[i]) | ||||||||||
upper = (melfreqs[i+2] - fftfreqs) / (melfreqs[i+2] - melfreqs[i+1]) | ||||||||||
lower = (fftfreqs .- melfreqs[i]) ./ (melfreqs[i+1] .- melfreqs[i]) | ||||||||||
upper = (melfreqs[i+2] .- fftfreqs) ./ (melfreqs[i+2] .- melfreqs[i+1]) | ||||||||||
Comment on lines
+81
to
+82
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
this is scalar minus I think There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it doesn't make any difference in the actual code though |
||||||||||
|
||||||||||
for j in 1:second_dim | ||||||||||
weights[i, j] = max(0, min(lower[j], upper[j])) * enorm[i] | ||||||||||
end | ||||||||||
Comment on lines
+84
to
+86
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. for loop version is guaranteed to be faster. Although I have no idea if this is performance critical code. |
||||||||||
|
||||||||||
weights[i, :] = max(0, min(lower, upper)) * enorm[i] | ||||||||||
end | ||||||||||
|
||||||||||
weights | ||||||||||
end | ||||||||||
|
||||||||||
function melspectrogram(audio::SampleBuf{T, 1}, windowsize::Int = 1024, hopsize::Int = windowsize >> 2; | ||||||||||
nmels::Int = 128, fmin::Real = 0f0, fmax::Real = audio.samplerate.val / 2f0) where T | ||||||||||
samplerate = audio.samplerate.val | ||||||||||
nmels::Int = 128, fmin::Real = 0f0, fmax::Real = audio.samplerate / 2f0) where T | ||||||||||
samplerate = audio.samplerate | ||||||||||
nfft = DSP.nextfastfft(windowsize) | ||||||||||
S = spectrogram(audio, windowsize, hopsize).power | ||||||||||
data = mel(samplerate, nfft, nmels, fmin, fmax) * S | ||||||||||
nframes = size(data, 2) | ||||||||||
MelSpectrogram(data, linspace(hz_to_mel(fmin)[1], hz_to_mel(fmax)[1], nmels), (0.0:nframes-1) * hopsize / samplerate) | ||||||||||
MelSpectrogram(data, range(hz_to_mel(fmin)[1], hz_to_mel(fmax)[1], length=nmels), (0.0:nframes-1) * hopsize / samplerate) | ||||||||||
end | ||||||||||
|
||||||||||
function mfcc(audio::SampleBuf{T, 1}, windowsize::Int = 1024, hopsize::Int = windowsize >> 2; | ||||||||||
nmfcc::Int = 20, nmels::Int = 128, fmin::Real = 0f0, fmax::Real = audio.samplerate.val / 2f0) where T | ||||||||||
nmfcc::Int = 20, nmels::Int = 128, fmin::Real = 0f0, fmax::Real = audio.samplerate / 2f0) where T | ||||||||||
if nmfcc >= nmels | ||||||||||
error("number of mfcc components should be less than the number of mel frequency bins") | ||||||||||
end | ||||||||||
|
Original file line number | Diff line number | Diff line change | ||||||||
---|---|---|---|---|---|---|---|---|---|---|
|
@@ -3,7 +3,7 @@ | |||||||||
tofloat(array::AbstractArray{T}) where T = map(Float32, array) | ||||||||||
tofloat(array::AbstractArray{Float32}) = array | ||||||||||
|
||||||||||
fft_frequencies(samplerate::Real, nfft::Int) = collect(linspace(0f0, samplerate / 2f0, (nfft >> 1) + 1)) | ||||||||||
fft_frequencies(samplerate::Real, nfft::Int) = collect(range(0f0, samplerate / 2f0, length=(nfft >> 1) + 1)) | ||||||||||
|
||||||||||
|
||||||||||
"""returns the number of frames when the signal is partitioned into overlapping frames""" | ||||||||||
|
@@ -42,10 +42,12 @@ end | |||||||||
|
||||||||||
"""returns the DCT filters""" | ||||||||||
function dct(nfilters::Int, ninput::Int) | ||||||||||
basis = Array(Float32, nfilters, ninput) | ||||||||||
basis = Array{Float32}(undef, nfilters, ninput) | ||||||||||
samples = (1f0:2f0:2ninput) * π / 2ninput | ||||||||||
for i = 1:nfilters | ||||||||||
basis[i, :] = cos(i * samples) | ||||||||||
for j in 1:length(samples) | ||||||||||
basis[i, j] = cos(i * samples[j]) | ||||||||||
end | ||||||||||
Comment on lines
+48
to
+50
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is a = [1,2,3]
a[:] = [4,5,6]
println(a) # [4,5,6] There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it is the preferred way to do it, as it allows loop fusion (here it doesn't apply, but better to follow the best style) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. in addition your example allocates one extra vector for not using There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for explanation! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nevertheless, the for loop version is guaranteed to be faster. |
||||||||||
end | ||||||||||
|
||||||||||
basis *= sqrt(2f0/ninput) | ||||||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I cannot see why use
tfr.power
instead ofpower(tfr)
, I think the latter (original) is more flexible