diff --git a/src/MusicProcessing.jl b/src/MusicProcessing.jl index fe91bb4..87eb2ba 100644 --- a/src/MusicProcessing.jl +++ b/src/MusicProcessing.jl @@ -3,6 +3,7 @@ module MusicProcessing using DSP using FFTW using FixedPointNumbers +using LinearAlgebra using Requires using Unitful using IntervalSets diff --git a/src/TFR.jl b/src/TFR.jl index 0ebaac5..6da7138 100644 --- a/src/TFR.jl +++ b/src/TFR.jl @@ -239,7 +239,7 @@ function phase_vocoder(stft::Array{Complex{T}, 2}, # store to output array cis!(cis_phase, phase_acc) - multiply!(cis_phase, cis_phase, mag) + cis_phase .= cis_phase .* mag for i in 1:nbins stretched[i, t] = cis_phase[i] end diff --git a/src/display.jl b/src/display.jl index 049fbdd..872bc2d 100644 --- a/src/display.jl +++ b/src/display.jl @@ -1,3 +1,4 @@ +import Base.show # methods to translate @@ -16,11 +17,11 @@ function yticklabels(tfr::MFCC, yticks::Array) map(Int, map(round, yticks)), "MFCC number" end -heatmap(tfr::DSP.Periodograms.TFR) = log10(power(tfr)) +heatmap(tfr::DSP.Periodograms.TFR) = map(x -> log(10,x), tfr.power) function draw_heatmap(tfr::DSP.Periodograms.TFR) - X = time(tfr) - Y = freq(tfr) + X = tfr.time + Y = tfr.freq Z = heatmap(tfr) PyPlot.pcolormesh(X, Y, Z) diff --git a/src/mel.jl b/src/mel.jl index 196e146..0c06bc7 100644 --- a/src/mel.jl +++ b/src/mel.jl @@ -26,7 +26,7 @@ function hz_to_mel(frequencies::Union{F, AbstractArray{F}}) where {F <: Real} f_min = 0f0 f_sp = 200f0 / 3 - mels = collect((frequencies - f_min) / f_sp) + mels = collect((frequencies .- f_min) ./ f_sp) min_log_hz = 1000f0 min_log_mel = (min_log_hz - f_min) / f_sp @@ -45,7 +45,7 @@ end function mel_to_hz(mels::Union{F, AbstractArray{F}}) where {F <: Real} f_min = 0f0 f_sp = 200f0 / 3 - frequencies = collect(f_min + f_sp * mels) + frequencies = collect(f_min .+ f_sp .* mels) min_log_hz = 1000f0 min_log_mel = (min_log_hz - f_min) / f_sp @@ -65,39 +65,43 @@ function mel_frequencies(nmels::Int = 128, fmin::Real = 0.0f0, fmax::Real = 1102 min_mel = hz_to_mel(fmin)[1] max_mel = hz_to_mel(fmax)[1] - mels = linspace(min_mel, max_mel, nmels) + mels = range(min_mel, max_mel, length=nmels) mel_to_hz(mels) end """""" function mel(samplerate::Real, nfft::Int, nmels::Int = 128, fmin::Real = 0f0, fmax::Real = samplerate/2f0) - weights = zeros(Float32, nmels, (nfft >> 1) + 1) + second_dim = ((nfft >> 1) + 1) + weights = zeros(Float32, nmels, second_dim) fftfreqs = fft_frequencies(samplerate, nfft) melfreqs = mel_frequencies(nmels + 2, fmin, fmax) - enorm = 2f0 ./ (melfreqs[3:end] - melfreqs[1:nmels]) + enorm = 2f0 ./ (melfreqs[3:end] .- melfreqs[1:nmels]) for i in 1:nmels - lower = (fftfreqs - melfreqs[i]) / (melfreqs[i+1] - melfreqs[i]) - upper = (melfreqs[i+2] - fftfreqs) / (melfreqs[i+2] - melfreqs[i+1]) + lower = (fftfreqs .- melfreqs[i]) ./ (melfreqs[i+1] .- melfreqs[i]) + upper = (melfreqs[i+2] .- fftfreqs) ./ (melfreqs[i+2] .- melfreqs[i+1]) + + for j in 1:second_dim + weights[i, j] = max(0, min(lower[j], upper[j])) * enorm[i] + end - weights[i, :] = max(0, min(lower, upper)) * enorm[i] end weights end function melspectrogram(audio::SampleBuf{T, 1}, windowsize::Int = 1024, hopsize::Int = windowsize >> 2; - nmels::Int = 128, fmin::Real = 0f0, fmax::Real = audio.samplerate.val / 2f0) where T - samplerate = audio.samplerate.val + nmels::Int = 128, fmin::Real = 0f0, fmax::Real = audio.samplerate / 2f0) where T + samplerate = audio.samplerate nfft = DSP.nextfastfft(windowsize) S = spectrogram(audio, windowsize, hopsize).power data = mel(samplerate, nfft, nmels, fmin, fmax) * S nframes = size(data, 2) - MelSpectrogram(data, linspace(hz_to_mel(fmin)[1], hz_to_mel(fmax)[1], nmels), (0.0:nframes-1) * hopsize / samplerate) + MelSpectrogram(data, range(hz_to_mel(fmin)[1], hz_to_mel(fmax)[1], length=nmels), (0.0:nframes-1) * hopsize / samplerate) end function mfcc(audio::SampleBuf{T, 1}, windowsize::Int = 1024, hopsize::Int = windowsize >> 2; - nmfcc::Int = 20, nmels::Int = 128, fmin::Real = 0f0, fmax::Real = audio.samplerate.val / 2f0) where T + nmfcc::Int = 20, nmels::Int = 128, fmin::Real = 0f0, fmax::Real = audio.samplerate / 2f0) where T if nmfcc >= nmels error("number of mfcc components should be less than the number of mel frequency bins") end diff --git a/src/util.jl b/src/util.jl index 2d2cecd..9f8f324 100644 --- a/src/util.jl +++ b/src/util.jl @@ -3,7 +3,7 @@ tofloat(array::AbstractArray{T}) where T = map(Float32, array) tofloat(array::AbstractArray{Float32}) = array -fft_frequencies(samplerate::Real, nfft::Int) = collect(linspace(0f0, samplerate / 2f0, (nfft >> 1) + 1)) +fft_frequencies(samplerate::Real, nfft::Int) = collect(range(0f0, samplerate / 2f0, length=(nfft >> 1) + 1)) """returns the number of frames when the signal is partitioned into overlapping frames""" @@ -42,10 +42,12 @@ end """returns the DCT filters""" function dct(nfilters::Int, ninput::Int) - basis = Array(Float32, nfilters, ninput) + basis = Array{Float32}(undef, nfilters, ninput) samples = (1f0:2f0:2ninput) * π / 2ninput for i = 1:nfilters - basis[i, :] = cos(i * samples) + for j in 1:length(samples) + basis[i, j] = cos(i * samples[j]) + end end basis *= sqrt(2f0/ninput)