Skip to content

Trying to work on functionality for the display and other updates #15

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 12 commits into from
1 change: 1 addition & 0 deletions src/MusicProcessing.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ module MusicProcessing
using DSP
using FFTW
using FixedPointNumbers
using LinearAlgebra
using Requires
using Unitful
using IntervalSets
Expand Down
2 changes: 1 addition & 1 deletion src/TFR.jl
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,7 @@ function phase_vocoder(stft::Array{Complex{T}, 2},

# store to output array
cis!(cis_phase, phase_acc)
multiply!(cis_phase, cis_phase, mag)
cis_phase .= cis_phase .* mag
for i in 1:nbins
stretched[i, t] = cis_phase[i]
end
Expand Down
7 changes: 4 additions & 3 deletions src/display.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import Base.show

# methods to translate

Expand All @@ -16,11 +17,11 @@ function yticklabels(tfr::MFCC, yticks::Array)
map(Int, map(round, yticks)), "MFCC number"
end

heatmap(tfr::DSP.Periodograms.TFR) = log10(power(tfr))
heatmap(tfr::DSP.Periodograms.TFR) = map(x -> log(10,x), tfr.power)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
heatmap(tfr::DSP.Periodograms.TFR) = map(x -> log(10,x), tfr.power)
heatmap(tfr::DSP.Periodograms.TFR) = log10.(power(tfr))

I cannot see why use tfr.power instead of power(tfr), I think the latter (original) is more flexible


function draw_heatmap(tfr::DSP.Periodograms.TFR)
X = time(tfr)
Y = freq(tfr)
X = tfr.time
Y = tfr.freq
Z = heatmap(tfr)

PyPlot.pcolormesh(X, Y, Z)
Expand Down
28 changes: 16 additions & 12 deletions src/mel.jl
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ function hz_to_mel(frequencies::Union{F, AbstractArray{F}}) where {F <: Real}
f_min = 0f0
f_sp = 200f0 / 3

mels = collect((frequencies - f_min) / f_sp)
mels = collect((frequencies .- f_min) ./ f_sp)

min_log_hz = 1000f0
min_log_mel = (min_log_hz - f_min) / f_sp
Expand All @@ -45,7 +45,7 @@ end
function mel_to_hz(mels::Union{F, AbstractArray{F}}) where {F <: Real}
f_min = 0f0
f_sp = 200f0 / 3
frequencies = collect(f_min + f_sp * mels)
frequencies = collect(f_min .+ f_sp .* mels)

min_log_hz = 1000f0
min_log_mel = (min_log_hz - f_min) / f_sp
Expand All @@ -65,39 +65,43 @@ function mel_frequencies(nmels::Int = 128, fmin::Real = 0.0f0, fmax::Real = 1102
min_mel = hz_to_mel(fmin)[1]
max_mel = hz_to_mel(fmax)[1]

mels = linspace(min_mel, max_mel, nmels)
mels = range(min_mel, max_mel, length=nmels)
mel_to_hz(mels)
end

""""""
function mel(samplerate::Real, nfft::Int, nmels::Int = 128, fmin::Real = 0f0, fmax::Real = samplerate/2f0)
weights = zeros(Float32, nmels, (nfft >> 1) + 1)
second_dim = ((nfft >> 1) + 1)
weights = zeros(Float32, nmels, second_dim)
fftfreqs = fft_frequencies(samplerate, nfft)
melfreqs = mel_frequencies(nmels + 2, fmin, fmax)
enorm = 2f0 ./ (melfreqs[3:end] - melfreqs[1:nmels])
enorm = 2f0 ./ (melfreqs[3:end] .- melfreqs[1:nmels])

for i in 1:nmels
lower = (fftfreqs - melfreqs[i]) / (melfreqs[i+1] - melfreqs[i])
upper = (melfreqs[i+2] - fftfreqs) / (melfreqs[i+2] - melfreqs[i+1])
lower = (fftfreqs .- melfreqs[i]) ./ (melfreqs[i+1] .- melfreqs[i])
upper = (melfreqs[i+2] .- fftfreqs) ./ (melfreqs[i+2] .- melfreqs[i+1])
Comment on lines +81 to +82

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
lower = (fftfreqs .- melfreqs[i]) ./ (melfreqs[i+1] .- melfreqs[i])
upper = (melfreqs[i+2] .- fftfreqs) ./ (melfreqs[i+2] .- melfreqs[i+1])
lower = (fftfreqs .- melfreqs[i]) ./ (melfreqs[i+1] - melfreqs[i])
upper = (melfreqs[i+2] .- fftfreqs) ./ (melfreqs[i+2] - melfreqs[i+1])

this is scalar minus I think

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it doesn't make any difference in the actual code though


for j in 1:second_dim
weights[i, j] = max(0, min(lower[j], upper[j])) * enorm[i]
end
Comment on lines +84 to +86
Copy link

@clouds56 clouds56 Jun 11, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
for j in 1:second_dim
weights[i, j] = max(0, min(lower[j], upper[j])) * enorm[i]
end
weights[i, :] .= max.(0, min.(lower, upper)) .* enorm[i]

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

for loop version is guaranteed to be faster. Although I have no idea if this is performance critical code.


weights[i, :] = max(0, min(lower, upper)) * enorm[i]
end

weights
end

function melspectrogram(audio::SampleBuf{T, 1}, windowsize::Int = 1024, hopsize::Int = windowsize >> 2;
nmels::Int = 128, fmin::Real = 0f0, fmax::Real = audio.samplerate.val / 2f0) where T
samplerate = audio.samplerate.val
nmels::Int = 128, fmin::Real = 0f0, fmax::Real = audio.samplerate / 2f0) where T
samplerate = audio.samplerate
nfft = DSP.nextfastfft(windowsize)
S = spectrogram(audio, windowsize, hopsize).power
data = mel(samplerate, nfft, nmels, fmin, fmax) * S
nframes = size(data, 2)
MelSpectrogram(data, linspace(hz_to_mel(fmin)[1], hz_to_mel(fmax)[1], nmels), (0.0:nframes-1) * hopsize / samplerate)
MelSpectrogram(data, range(hz_to_mel(fmin)[1], hz_to_mel(fmax)[1], length=nmels), (0.0:nframes-1) * hopsize / samplerate)
end

function mfcc(audio::SampleBuf{T, 1}, windowsize::Int = 1024, hopsize::Int = windowsize >> 2;
nmfcc::Int = 20, nmels::Int = 128, fmin::Real = 0f0, fmax::Real = audio.samplerate.val / 2f0) where T
nmfcc::Int = 20, nmels::Int = 128, fmin::Real = 0f0, fmax::Real = audio.samplerate / 2f0) where T
if nmfcc >= nmels
error("number of mfcc components should be less than the number of mel frequency bins")
end
Expand Down
8 changes: 5 additions & 3 deletions src/util.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
tofloat(array::AbstractArray{T}) where T = map(Float32, array)
tofloat(array::AbstractArray{Float32}) = array

fft_frequencies(samplerate::Real, nfft::Int) = collect(linspace(0f0, samplerate / 2f0, (nfft >> 1) + 1))
fft_frequencies(samplerate::Real, nfft::Int) = collect(range(0f0, samplerate / 2f0, length=(nfft >> 1) + 1))


"""returns the number of frames when the signal is partitioned into overlapping frames"""
Expand Down Expand Up @@ -42,10 +42,12 @@ end

"""returns the DCT filters"""
function dct(nfilters::Int, ninput::Int)
basis = Array(Float32, nfilters, ninput)
basis = Array{Float32}(undef, nfilters, ninput)
samples = (1f0:2f0:2ninput) * π / 2ninput
for i = 1:nfilters
basis[i, :] = cos(i * samples)
for j in 1:length(samples)
basis[i, j] = cos(i * samples[j])
end
Comment on lines +48 to +50
Copy link

@clouds56 clouds56 Jun 11, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
for j in 1:length(samples)
basis[i, j] = cos(i * samples[j])
end
basis[i, :] .= cos.(i .* samples)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

.= instead

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is .= necessary? I've tried in REPL

a = [1,2,3]
a[:] = [4,5,6]
println(a) # [4,5,6]

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it is the preferred way to do it, as it allows loop fusion (here it doesn't apply, but better to follow the best style)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

in addition your example allocates one extra vector for not using .=

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for explanation!

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nevertheless, the for loop version is guaranteed to be faster.

end

basis *= sqrt(2f0/ninput)
Expand Down