diff --git a/src/feat/feature-mfcc-test.cc b/src/feat/feature-mfcc-test.cc index c4367139707..e81458741ef 100644 --- a/src/feat/feature-mfcc-test.cc +++ b/src/feat/feature-mfcc-test.cc @@ -95,8 +95,8 @@ static void UnitTestSimple() { op.frame_opts.round_to_power_of_two = true; op.mel_opts.low_freq = 0.0; op.mel_opts.htk_mode = true; + op.mel_opts.modified = (Rand() % 2 == 0 ? true : false); op.htk_compat = true; - Mfcc mfcc(op); // use default parameters @@ -613,42 +613,29 @@ static void UnitTestHTKCompare6() { } std::cout << "Test passed :)\n\n"; - + unlink("tmp.test.wav.fea_kaldi.6"); } void UnitTestVtln() { // Test the function VtlnWarpFreq. - BaseFloat low_freq = 10, high_freq = 7800, - vtln_low_cutoff = 20, vtln_high_cutoff = 7400; - + BaseFloat low_freq = 10, high_freq = 7800; + MelBanksOptions mel_opts; + mel_opts.low_freq = low_freq, mel_opts.high_freq = high_freq; + FrameExtractionOptions frame_opts; + MelBanks melfbank(mel_opts, frame_opts, 0.9); for (size_t i = 0; i < 100; i++) { BaseFloat freq = 5000, warp_factor = 0.9 + RandUniform() * 0.2; - AssertEqual(MelBanks::VtlnWarpFreq(vtln_low_cutoff, vtln_high_cutoff, - low_freq, high_freq, warp_factor, - freq), - freq / warp_factor); - - AssertEqual(MelBanks::VtlnWarpFreq(vtln_low_cutoff, vtln_high_cutoff, - low_freq, high_freq, warp_factor, - low_freq), - low_freq); - AssertEqual(MelBanks::VtlnWarpFreq(vtln_low_cutoff, vtln_high_cutoff, - low_freq, high_freq, warp_factor, - high_freq), - high_freq); + AssertEqual(melfbank.VtlnWarpFreq(warp_factor, freq), freq / warp_factor); + + AssertEqual(melfbank.VtlnWarpFreq(warp_factor, low_freq), low_freq); + AssertEqual(melfbank.VtlnWarpFreq(warp_factor, high_freq), high_freq); BaseFloat freq2 = low_freq + (high_freq-low_freq) * RandUniform(), freq3 = freq2 + (high_freq-freq2) * RandUniform(); // freq3>=freq2 - BaseFloat w2 = MelBanks::VtlnWarpFreq(vtln_low_cutoff, vtln_high_cutoff, - low_freq, high_freq, warp_factor, - freq2); - BaseFloat w3 = MelBanks::VtlnWarpFreq(vtln_low_cutoff, vtln_high_cutoff, - low_freq, high_freq, warp_factor, - freq3); + BaseFloat w2 = melfbank.VtlnWarpFreq(warp_factor, freq2); + BaseFloat w3 = melfbank.VtlnWarpFreq(warp_factor, freq3); KALDI_ASSERT(w3 >= w2); // increasing function. - BaseFloat w3dash = MelBanks::VtlnWarpFreq(vtln_low_cutoff, vtln_high_cutoff, - low_freq, high_freq, 1.0, - freq3); + BaseFloat w3dash = melfbank.VtlnWarpFreq(1.0, freq3); AssertEqual(w3dash, freq3); } } diff --git a/src/feat/mel-computations.cc b/src/feat/mel-computations.cc index 810b6247e93..bf1563434eb 100644 --- a/src/feat/mel-computations.cc +++ b/src/feat/mel-computations.cc @@ -34,78 +34,57 @@ MelBanks::MelBanks(const MelBanksOptions &opts, const FrameExtractionOptions &frame_opts, BaseFloat vtln_warp_factor): htk_mode_(opts.htk_mode) { + SetConfigs(opts, frame_opts, vtln_warp_factor); + int32 num_bins = opts.num_bins; if (num_bins < 3) KALDI_ERR << "Must have at least 3 mel bins"; - BaseFloat sample_freq = frame_opts.samp_freq; - int32 window_length_padded = frame_opts.PaddedWindowSize(); - KALDI_ASSERT(window_length_padded % 2 == 0); - int32 num_fft_bins = window_length_padded / 2; - BaseFloat nyquist = 0.5 * sample_freq; - BaseFloat low_freq = opts.low_freq, high_freq; - if (opts.high_freq > 0.0) - high_freq = opts.high_freq; - else - high_freq = nyquist + opts.high_freq; - if (low_freq < 0.0 || low_freq >= nyquist - || high_freq <= 0.0 || high_freq > nyquist - || high_freq <= low_freq) - KALDI_ERR << "Bad values in options: low-freq " << low_freq - << " and high-freq " << high_freq << " vs. nyquist " - << nyquist; - - BaseFloat fft_bin_width = sample_freq / window_length_padded; - // fft-bin width [think of it as Nyquist-freq / half-window-length] + BaseFloat mel_low_freq = MelScale(low_freq_); + BaseFloat mel_high_freq = MelScale(high_freq_); - BaseFloat mel_low_freq = MelScale(low_freq); - BaseFloat mel_high_freq = MelScale(high_freq); - debug_ = opts.debug_mel; - // divide by num_bins+1 in next line because of end-effects where the bins - // spread out to the sides. - BaseFloat mel_freq_delta = (mel_high_freq - mel_low_freq) / (num_bins+1); + bins_.resize(num_bins); + center_freqs_.Resize(num_bins); - BaseFloat vtln_low = opts.vtln_low, - vtln_high = opts.vtln_high; - if (vtln_high < 0.0) { - vtln_high += nyquist; + for (int32 bin = 0; bin < num_bins; bin++) { + BaseFloat mel = mel_low_freq + + (bin + 1) * (mel_high_freq - mel_low_freq) / (num_bins + 1); + if (vtln_warp_factor != 1.0) + mel = VtlnWarpMelFreq(vtln_warp_factor, mel); + center_freqs_(bin) = InverseMelScale(mel); } - if (vtln_warp_factor != 1.0 && - (vtln_low < 0.0 || vtln_low <= low_freq - || vtln_low >= high_freq - || vtln_high <= 0.0 || vtln_high >= high_freq - || vtln_high <= vtln_low)) - KALDI_ERR << "Bad values in options: vtln-low " << vtln_low - << " and vtln-high " << vtln_high << ", versus " - << "low-freq " << low_freq << " and high-freq " - << high_freq; + if (!opts.modified) + ComputeBins(opts.htk_mode); + else + ComputeModifiedBins(); - bins_.resize(num_bins); - center_freqs_.Resize(num_bins); + if (debug_) { + for (size_t i = 0; i < bins_.size(); i++) { + KALDI_LOG << "bin " << i << ", offset = " << bins_[i].first + << ", vec = " << bins_[i].second; + } + } +} +void MelBanks::ComputeBins(bool htk_mode) { + int32 num_bins = center_freqs_.Dim(); for (int32 bin = 0; bin < num_bins; bin++) { - BaseFloat left_mel = mel_low_freq + bin * mel_freq_delta, - center_mel = mel_low_freq + (bin + 1) * mel_freq_delta, - right_mel = mel_low_freq + (bin + 2) * mel_freq_delta; - - if (vtln_warp_factor != 1.0) { - left_mel = VtlnWarpMelFreq(vtln_low, vtln_high, low_freq, high_freq, - vtln_warp_factor, left_mel); - center_mel = VtlnWarpMelFreq(vtln_low, vtln_high, low_freq, high_freq, - vtln_warp_factor, center_mel); - right_mel = VtlnWarpMelFreq(vtln_low, vtln_high, low_freq, high_freq, - vtln_warp_factor, right_mel); - } - center_freqs_(bin) = InverseMelScale(center_mel); + // center_mel is the center frequency (in mel) of this bin, and left_mel and + // right_mel are those of the bins immediately to the left and right. + BaseFloat center_mel = MelScale(center_freqs_(bin)), + left_mel = MelScale(bin == 0 ? + low_freq_ : center_freqs_(bin - 1)), + right_mel = MelScale(bin == num_bins - 1 ? + high_freq_ : center_freqs_(bin + 1)); // this_bin will be a vector of coefficients that is only // nonzero where this mel bin is active. - Vector this_bin(num_fft_bins); + Vector this_bin(num_fft_bins_); int32 first_index = -1, last_index = -1; - for (int32 i = 0; i < num_fft_bins; i++) { - BaseFloat freq = (fft_bin_width * i); // Center frequency of this fft + for (int32 i = 0; i < num_fft_bins_; i++) { + BaseFloat freq = (fft_bin_width_ * i); // Center frequency of this fft // bin. BaseFloat mel = MelScale(freq); if (mel > left_mel && mel < right_mel) { @@ -113,7 +92,7 @@ MelBanks::MelBanks(const MelBanksOptions &opts, if (mel <= center_mel) weight = (mel - left_mel) / (center_mel - left_mel); else - weight = (right_mel-mel) / (right_mel-center_mel); + weight = (right_mel - mel) / (right_mel - center_mel); this_bin(i) = weight; if (first_index == -1) first_index = i; @@ -129,29 +108,93 @@ MelBanks::MelBanks(const MelBanksOptions &opts, bins_[bin].second.CopyFromVec(this_bin.Range(first_index, size)); // Replicate a bug in HTK, for testing purposes. - if (opts.htk_mode && bin == 0 && mel_low_freq != 0.0) + if (htk_mode && bin == 0 && low_freq_ != 0.0) bins_[bin].second(0) = 0.0; - } - if (debug_) { - for (size_t i = 0; i < bins_.size(); i++) { - KALDI_LOG << "bin " << i << ", offset = " << bins_[i].first - << ", vec = " << bins_[i].second; +} + +/* + Notes on the shape of the modified bins. + + They are shaped like a cosine function from -pi/2 to pi/2 (unlike the standard + triangular bins). We define their diameter as the distance between the + first and last nonzero value (pi for the canonical function). We choose + the diameter as: + d = sqrt(d1^2 + d2^2) + (this function may be viewed as a kind of soft-max), where d1 and d2 are + two different formulas for the diameter that we describe below. + + d1 is a formula that ensures the bins overlap by at least a minimal amount. + + Let bin_diff be the difference in Hz between this bin's center-frequency + and the next bin's center-frequency, or (if this is the last bin), + the user-specified `high-freq` which is the top of the range of frequencies + we cover. Then: + + d1 = 1.1 * bin_diff + + The formula for d2 is designed to provide a reasonable floor so the bandwidth + don't get ridiculously narrow as we add more bins, and to approximate what we + observed the filter diameters to look like when learning filterbanks via DNNs. + The formula is: + + d2 = 50 + 50 * f / (f + 700) + + which roughly means: start with a diameter of 50Hz, increasing gradually to + 100Hz for bins with center frequency more than about 700Hz. There is no + rocket science behind this formula; it was obtained through a combination of + trying to match the DNN-learned filterbank bandwidths (cite: Pegah's thesis), + and manual tuning. + */ +void MelBanks::ComputeModifiedBins() { + int32 num_bins = center_freqs_.Dim(); + for (int32 bin = 0; bin < num_bins; bin++) { + BaseFloat center_freq = center_freqs_(bin), + next_center = (bin == num_bins - 1 ? + high_freq_ : center_freqs_(bin + 1)); + + BaseFloat d1 = (next_center - center_freq) * 1.1, + d2 = 60.0 + 50.0 * (center_freq / (center_freq + breakpoint_)); + + // 'diameter' is in Hz; it represents the distance on the frequency axis + // between the first and last nonzero points of the raised-cosine window + // function. This formula applies our heuristic, described above, + // to choose the diameter. + BaseFloat diameter = sqrt(d1 * d1 + d2 * d2); + + // 'freq_scale' is the scaling factor on the frequencies that will ensure + // that the diameter becomes equal to pi, like the canonical bin function + // (the cosine from -pi/2 to pi/2). + BaseFloat freq_scale = M_PI / diameter; + + // this_bin will be a vector of coefficients that is only + // nonzero where this mel bin is active. + Vector this_bin(num_fft_bins_); + int32 first_index = -1, last_index = -1; + + for (int32 i = 0; i < num_fft_bins_; i++) { + BaseFloat freq = (fft_bin_width_ * i); // Center frequency of this fft + // bin. + BaseFloat normalized_freq = freq_scale * (freq - center_freq); + if (normalized_freq > -M_PI_2 && normalized_freq < M_PI_2) { + BaseFloat weight = cos(normalized_freq); + this_bin(i) = weight; + if (first_index == -1) + first_index = i; + last_index = i; + } } + KALDI_ASSERT(first_index != -1 && last_index >= first_index + && "You may have set --num-mel-bins too large."); + + bins_[bin].first = first_index; + int32 size = last_index + 1 - first_index; + bins_[bin].second.Resize(size); + bins_[bin].second.CopyFromVec(this_bin.Range(first_index, size)); } } -MelBanks::MelBanks(const MelBanks &other): - center_freqs_(other.center_freqs_), - bins_(other.bins_), - debug_(other.debug_), - htk_mode_(other.htk_mode_) { } - -BaseFloat MelBanks::VtlnWarpFreq(BaseFloat vtln_low_cutoff, // upper+lower frequency cutoffs for VTLN. - BaseFloat vtln_high_cutoff, - BaseFloat low_freq, // upper+lower frequency cutoffs in mel computation - BaseFloat high_freq, - BaseFloat vtln_warp_factor, +BaseFloat MelBanks::VtlnWarpFreq(BaseFloat vtln_warp_factor, BaseFloat freq) { /// This computes a VTLN warping function that is not the same as HTK's one, /// but has similar inputs (this function has the advantage of never producing @@ -180,45 +223,34 @@ BaseFloat MelBanks::VtlnWarpFreq(BaseFloat vtln_low_cutoff, // upper+lower freq /// = vtln_low_cutoff * max(1, vtln_warp_factor) - if (freq < low_freq || freq > high_freq) return freq; // in case this gets called + if (freq < low_freq_ || freq > high_freq_) return freq; // in case this gets called // for out-of-range frequencies, just return the freq. - KALDI_ASSERT(vtln_low_cutoff > low_freq && - "be sure to set the --vtln-low option higher than --low-freq"); - KALDI_ASSERT(vtln_high_cutoff < high_freq && - "be sure to set the --vtln-high option lower than --high-freq [or negative]"); - BaseFloat one = 1.0; - BaseFloat l = vtln_low_cutoff * std::max(one, vtln_warp_factor); - BaseFloat h = vtln_high_cutoff * std::min(one, vtln_warp_factor); + BaseFloat l = vtln_low_ * std::max(BaseFloat(1.0), vtln_warp_factor); + BaseFloat h = vtln_high_ * std::min(BaseFloat(1.0), vtln_warp_factor); BaseFloat scale = 1.0 / vtln_warp_factor; BaseFloat Fl = scale * l; // F(l); BaseFloat Fh = scale * h; // F(h); - KALDI_ASSERT(l > low_freq && h < high_freq); + KALDI_ASSERT(l > low_freq_ && h < high_freq_); // slope of left part of the 3-piece linear function - BaseFloat scale_left = (Fl - low_freq) / (l - low_freq); + BaseFloat scale_left = (Fl - low_freq_) / (l - low_freq_); // [slope of center part is just "scale"] // slope of right part of the 3-piece linear function - BaseFloat scale_right = (high_freq - Fh) / (high_freq - h); + BaseFloat scale_right = (high_freq_ - Fh) / (high_freq_ - h); if (freq < l) { - return low_freq + scale_left * (freq - low_freq); + return low_freq_ + scale_left * (freq - low_freq_); } else if (freq < h) { return scale * freq; } else { // freq >= h - return high_freq + scale_right * (freq - high_freq); + return high_freq_ + scale_right * (freq - high_freq_); } } -BaseFloat MelBanks::VtlnWarpMelFreq(BaseFloat vtln_low_cutoff, // upper+lower frequency cutoffs for VTLN. - BaseFloat vtln_high_cutoff, - BaseFloat low_freq, // upper+lower frequency cutoffs in mel computation - BaseFloat high_freq, - BaseFloat vtln_warp_factor, +BaseFloat MelBanks::VtlnWarpMelFreq(BaseFloat vtln_warp_factor, BaseFloat mel_freq) { - return MelScale(VtlnWarpFreq(vtln_low_cutoff, vtln_high_cutoff, - low_freq, high_freq, - vtln_warp_factor, InverseMelScale(mel_freq))); + return MelScale(VtlnWarpFreq(vtln_warp_factor, InverseMelScale(mel_freq))); } @@ -241,15 +273,55 @@ void MelBanks::Compute(const VectorBase &power_spectrum, // it early. KALDI_ASSERT(!KALDI_ISNAN((*mel_energies_out)(i))); } +} - if (debug_) { - fprintf(stderr, "MEL BANKS:\n"); - for (int32 i = 0; i < num_bins; i++) - fprintf(stderr, " %f", (*mel_energies_out)(i)); - fprintf(stderr, "\n"); - } +void MelBanks::SetConfigs(const MelBanksOptions &opts, + const FrameExtractionOptions &frame_opts, + BaseFloat vtln_warp_factor) { + BaseFloat sample_freq = frame_opts.samp_freq, + nyquist = 0.5 * sample_freq; + int32 window_length_padded = frame_opts.PaddedWindowSize(); + KALDI_ASSERT(window_length_padded % 2 == 0); + num_fft_bins_ = window_length_padded / 2; + // fft-bin width [think of it as Nyquist-freq / half-window-length] + fft_bin_width_ = sample_freq / window_length_padded; + + debug_ = opts.debug_mel; + + + low_freq_ = opts.low_freq; + if (opts.high_freq > 0.0) + high_freq_ = opts.high_freq; + else + high_freq_ = nyquist + opts.high_freq; + + if (low_freq_ < 0.0 || low_freq_ >= nyquist + || high_freq_ <= 0.0 || high_freq_ > nyquist + || high_freq_ <= low_freq_) + KALDI_ERR << "Bad values in options: low-freq " << low_freq_ + << " and high-freq " << high_freq_ << " vs. nyquist " + << nyquist; + + breakpoint_ = (opts.modified ? 300.0 : 700.0); + second_breakpoint_ = (opts.modified ? 2000.0 : -1); + vtln_low_ = opts.vtln_low; + if (opts.vtln_high > 0.0) + vtln_high_ = opts.vtln_high; + else + vtln_high_ = opts.vtln_high + nyquist; + + if (vtln_warp_factor != 1.0 && + (vtln_low_ < 0.0 || vtln_low_ <= low_freq_ + || vtln_low_ >= high_freq_ + || vtln_high_ <= 0.0 || vtln_high_ >= high_freq_ + || vtln_high_ <= vtln_low_)) + KALDI_ERR << "Bad values in options: vtln-low " << vtln_low_ + << " and vtln-high " << vtln_high_ << ", versus " + << "low-freq " << low_freq_ << " and high-freq " + << high_freq_; } + void ComputeLifterCoeffs(BaseFloat Q, VectorBase *coeffs) { // Compute liftering coefficients (scaling on cepstral coeffs) // coeffs are numbered slightly differently from HTK: the zeroth diff --git a/src/feat/mel-computations.h b/src/feat/mel-computations.h index 3659628cdfb..3231671fa8b 100644 --- a/src/feat/mel-computations.h +++ b/src/feat/mel-computations.h @@ -48,6 +48,7 @@ struct MelBanksOptions { BaseFloat vtln_low; // vtln lower cutoff of warping function. BaseFloat vtln_high; // vtln upper cutoff of warping function: if negative, added // to the Nyquist frequency to get the cutoff. + bool modified; // If true, use 'modified' MFCC. bool debug_mel; // htk_mode is a "hidden" config, it does not show up on command line. // Enables more exact compatibibility with HTK, for testing purposes. Affects @@ -55,7 +56,7 @@ struct MelBanksOptions { bool htk_mode; explicit MelBanksOptions(int num_bins = 25) : num_bins(num_bins), low_freq(20), high_freq(0), vtln_low(100), - vtln_high(-500), debug_mel(false), htk_mode(false) {} + vtln_high(-500), modified(false), debug_mel(false), htk_mode(false) {} void Register(OptionsItf *opts) { opts->Register("num-mel-bins", &num_bins, @@ -69,6 +70,10 @@ struct MelBanksOptions { opts->Register("vtln-high", &vtln_high, "High inflection point in piecewise linear VTLN warping function" " (if negative, offset from high-mel-freq"); + opts->Register("modified", &modified, + "If true, use a modified form of the Mel scale that gives " + "more emphasis to lower frequencies, and use differently " + "tuned bin shapes and widths than normal."); opts->Register("debug-mel", &debug_mel, "Print out debugging information for mel bin computation"); } @@ -78,30 +83,6 @@ struct MelBanksOptions { class MelBanks { public: - static inline BaseFloat InverseMelScale(BaseFloat mel_freq) { - return 700.0f * (expf (mel_freq / 1127.0f) - 1.0f); - } - - static inline BaseFloat MelScale(BaseFloat freq) { - return 1127.0f * logf (1.0f + freq / 700.0f); - } - - static BaseFloat VtlnWarpFreq(BaseFloat vtln_low_cutoff, - BaseFloat vtln_high_cutoff, // discontinuities in warp func - BaseFloat low_freq, - BaseFloat high_freq, // upper+lower frequency cutoffs in - // the mel computation - BaseFloat vtln_warp_factor, - BaseFloat freq); - - static BaseFloat VtlnWarpMelFreq(BaseFloat vtln_low_cutoff, - BaseFloat vtln_high_cutoff, - BaseFloat low_freq, - BaseFloat high_freq, - BaseFloat vtln_warp_factor, - BaseFloat mel_freq); - - MelBanks(const MelBanksOptions &opts, const FrameExtractionOptions &frame_opts, BaseFloat vtln_warp_factor); @@ -116,22 +97,90 @@ class MelBanks { // returns vector of central freq of each bin; needed by plp code. const Vector &GetCenterFreqs() const { return center_freqs_; } - const std::vector > >& GetBins() const { - return bins_; - } + BaseFloat VtlnWarpFreq(BaseFloat vtln_warp_factor, BaseFloat freq); + - // Copy constructor - MelBanks(const MelBanks &other); + BaseFloat VtlnWarpMelFreq(BaseFloat vtln_warp_factor, BaseFloat mel_freq); + + // Use the default copy constructor private: + + // This function checks that the provided options make sense, and also sets + // configuration variables like breakpoint_ in this class. + void SetConfigs(const MelBanksOptions &opts, + const FrameExtractionOptions &frame_opts, + BaseFloat vtln_warp_factor); + + inline BaseFloat InverseMelScale(BaseFloat mel_freq) { + BaseFloat b1 = breakpoint_, b2 = second_breakpoint_; + if (b2 > 0.0) // modified Mel scale + return b2 * (expf((expf(mel_freq) - b1) / b2) - 1.0); + else + return b1 * (expf(mel_freq) - 1.0); + } + + inline BaseFloat MelScale(BaseFloat freq) { + BaseFloat b1 = breakpoint_, b2 = second_breakpoint_; + if (b2 > 0.0) { + // Modified Mel: linear, till ~b1, then log till ~b2, then log(log) + return log (b1 + b2 * log(1.0 + freq / b2)); + } else { + // Mel: linear till ~b1 = 700, then logarithmic. We ignore the scaling + // factor as it makes no difference to our application. + return log(1.0 + freq / b1); + } + } + + // This sets up the 'bins_' member, for the regular (not modified) + // computation. It assumes center_freqs_ is already set up. + // 'htk_mode' is expected to be a copy of opts.htk_mode as given to the + // constructor. + void ComputeBins(bool htk_mode); + + // This sets up the 'bins_' member, for the modified computaion + // with cosine-shaped bins that are more tightly + // computation. It assumes center_freqs_ is already set up. + // 'htk_mode' is expected to be a copy of opts.htk_mode as given to the + // constructor. + void ComputeModifiedBins(); + // Disallow assignment MelBanks &operator = (const MelBanks &other); - // center frequencies of bins, numbered from 0 ... num_bins-1. - // Needed by GetCenterFreqs(). + + + // The following few variables are derived from the configuration + // options passed in; they are used in converting to and from Mel frequencies, + // and for other purposes. + BaseFloat breakpoint_; // The breakpoint of the Mel scale (700) if we + // are using mel scale; otherwise the first + // breakpoint in the modified-mel scale, + // e.g. 300. Only relevant if --modified=true + BaseFloat second_breakpoint_; // The second breakpoint used in the modified + // mel scale, e.g. 2000. + // Only relevant if --modified=true + + BaseFloat low_freq_; // opts.low_freq + BaseFloat high_freq_; // The same as opts.high_freq if it's >= 0, or + // otherwise the Nyquist plus opts.high_freq. + BaseFloat vtln_low_; // opts.vtln_low; the lower cutoff for VTLN. + BaseFloat vtln_high_; // opts.vtln_high; the upper cutoff for VTLN. + + int32 num_fft_bins_; // The number of FFT frequency bins (actually, excluding + // the one at the Nyquist). Equal to half the padded + // window length. + BaseFloat fft_bin_width_; // The frequency separation between successive + // FFT bins: equal nyquist / num_fft_bins_. + + + // center frequencies of bins (in Hz), numbered from 0 ... num_bins-1. Needed + // by GetCenterFreqs(). Vector center_freqs_; - // the "bins_" vector is a vector, one for each bin, of a pair: - // (the first nonzero fft-bin), (the vector of weights). + // the "bins_" vector is a vector, one for each mel bin, of a pair: (the + // first nonzero fft-bin), (the vector of weights). The pair of (int32, + // Vector) is provided for efficiency, to avoid having a larger vector with + // many zero entries. std::vector > > bins_; bool debug_;