From 232df9f2942a802edda9a894b11cfa015f6788eb Mon Sep 17 00:00:00 2001
From: Pegita <pegahgh@gmail.com>
Date: Wed, 12 Dec 2018 01:07:17 -0500
Subject: [PATCH 1/9] added modified MFCC features based on DNN-c and fDNN-c
 features; it is activated using --modified option.

---
 src/feat/mel-computations.cc | 248 +++++++++++++++++++++--------------
 src/feat/mel-computations.h  |  98 +++++++++-----
 2 files changed, 220 insertions(+), 126 deletions(-)

diff --git a/src/feat/mel-computations.cc b/src/feat/mel-computations.cc
index 810b6247e93..bd40a527b29 100644
--- a/src/feat/mel-computations.cc
+++ b/src/feat/mel-computations.cc
@@ -34,78 +34,57 @@ MelBanks::MelBanks(const MelBanksOptions &opts,
                    const FrameExtractionOptions &frame_opts,
                    BaseFloat vtln_warp_factor):
     htk_mode_(opts.htk_mode) {
+  SetConfigs(opts, frame_opts, vtln_warp_factor);
+
   int32 num_bins = opts.num_bins;
   if (num_bins < 3) KALDI_ERR << "Must have at least 3 mel bins";
-  BaseFloat sample_freq = frame_opts.samp_freq;
-  int32 window_length_padded = frame_opts.PaddedWindowSize();
-  KALDI_ASSERT(window_length_padded % 2 == 0);
-  int32 num_fft_bins = window_length_padded / 2;
-  BaseFloat nyquist = 0.5 * sample_freq;
 
-  BaseFloat low_freq = opts.low_freq, high_freq;
-  if (opts.high_freq > 0.0)
-    high_freq = opts.high_freq;
-  else
-    high_freq = nyquist + opts.high_freq;
 
-  if (low_freq < 0.0 || low_freq >= nyquist
-      || high_freq <= 0.0 || high_freq > nyquist
-      || high_freq <= low_freq)
-    KALDI_ERR << "Bad values in options: low-freq " << low_freq
-              << " and high-freq " << high_freq << " vs. nyquist "
-              << nyquist;
-
-  BaseFloat fft_bin_width = sample_freq / window_length_padded;
-  // fft-bin width [think of it as Nyquist-freq / half-window-length]
+  BaseFloat mel_low_freq = MelScale(low_freq_);
+  BaseFloat mel_high_freq = MelScale(high_freq_);
 
-  BaseFloat mel_low_freq = MelScale(low_freq);
-  BaseFloat mel_high_freq = MelScale(high_freq);
 
-  debug_ = opts.debug_mel;
 
-  // divide by num_bins+1 in next line because of end-effects where the bins
-  // spread out to the sides.
-  BaseFloat mel_freq_delta = (mel_high_freq - mel_low_freq) / (num_bins+1);
+  bins_.resize(num_bins);
+  center_freqs_.Resize(num_bins);
 
-  BaseFloat vtln_low = opts.vtln_low,
-      vtln_high = opts.vtln_high;
-  if (vtln_high < 0.0) {
-    vtln_high += nyquist;
+  for (int32 bin = 0; bin < num_bins; bin++) {
+    BaseFloat mel = mel_low_freq +
+        (bin + 1) * (mel_high_freq - mel_low_freq) / (num_bins + 1);
+    if (vtln_warp_factor != 1.0)
+      mel = VtlnWarpMelFreq(vtln_warp_factor, mel);
+    center_freqs_(bin) = InverseMelScale(mel);
   }
 
-  if (vtln_warp_factor != 1.0 &&
-      (vtln_low < 0.0 || vtln_low <= low_freq
-       || vtln_low >= high_freq
-       || vtln_high <= 0.0 || vtln_high >= high_freq
-       || vtln_high <= vtln_low))
-    KALDI_ERR << "Bad values in options: vtln-low " << vtln_low
-              << " and vtln-high " << vtln_high << ", versus "
-              << "low-freq " << low_freq << " and high-freq "
-              << high_freq;
+  if (!opts.modified)
+    ComputeBins(opts.htk_mode);
+  else
+    ComputeModifiedBins();
 
-  bins_.resize(num_bins);
-  center_freqs_.Resize(num_bins);
+  if (debug_) {
+    for (size_t i = 0; i < bins_.size(); i++) {
+      KALDI_LOG << "bin " << i << ", offset = " << bins_[i].first
+                << ", vec = " << bins_[i].second;
+    }
+  }
+}
 
+void MelBanks::ComputeBins(bool htk_mode) {
+  int32 num_bins = center_freqs_.Dim();
   for (int32 bin = 0; bin < num_bins; bin++) {
-    BaseFloat left_mel = mel_low_freq + bin * mel_freq_delta,
-        center_mel = mel_low_freq + (bin + 1) * mel_freq_delta,
-        right_mel = mel_low_freq + (bin + 2) * mel_freq_delta;
-
-    if (vtln_warp_factor != 1.0) {
-      left_mel = VtlnWarpMelFreq(vtln_low, vtln_high, low_freq, high_freq,
-                                 vtln_warp_factor, left_mel);
-      center_mel = VtlnWarpMelFreq(vtln_low, vtln_high, low_freq, high_freq,
-                                 vtln_warp_factor, center_mel);
-      right_mel = VtlnWarpMelFreq(vtln_low, vtln_high, low_freq, high_freq,
-                                  vtln_warp_factor, right_mel);
-    }
-    center_freqs_(bin) = InverseMelScale(center_mel);
+    // center_mel is the center frequency (in mel) of this bin, and left_mel and
+    // right_mel are those of the bins immediately to the left and right.
+    BaseFloat center_mel = MelScale(center_freqs_(bin)),
+        left_mel = MelScale(bin == 0 ?
+                            low_freq_ : center_freqs_(bin - 1)),
+        right_mel = MelScale(bin == num_bins - 1 ?
+                             high_freq_ : center_freqs_(bin + 1));
     // this_bin will be a vector of coefficients that is only
     // nonzero where this mel bin is active.
-    Vector<BaseFloat> this_bin(num_fft_bins);
+    Vector<BaseFloat> this_bin(num_fft_bins_);
     int32 first_index = -1, last_index = -1;
-    for (int32 i = 0; i < num_fft_bins; i++) {
-      BaseFloat freq = (fft_bin_width * i);  // Center frequency of this fft
+    for (int32 i = 0; i < num_fft_bins_; i++) {
+      BaseFloat freq = (fft_bin_width_ * i);  // Center frequency of this fft
                                              // bin.
       BaseFloat mel = MelScale(freq);
       if (mel > left_mel && mel < right_mel) {
@@ -113,7 +92,7 @@ MelBanks::MelBanks(const MelBanksOptions &opts,
         if (mel <= center_mel)
           weight = (mel - left_mel) / (center_mel - left_mel);
         else
-         weight = (right_mel-mel) / (right_mel-center_mel);
+         weight = (right_mel-mel) / (right_mel - center_mel);
         this_bin(i) = weight;
         if (first_index == -1)
           first_index = i;
@@ -129,29 +108,73 @@ MelBanks::MelBanks(const MelBanksOptions &opts,
     bins_[bin].second.CopyFromVec(this_bin.Range(first_index, size));
 
     // Replicate a bug in HTK, for testing purposes.
-    if (opts.htk_mode && bin == 0 && mel_low_freq != 0.0)
+    if (htk_mode && bin == 0 && low_freq_ != 0.0)
       bins_[bin].second(0) = 0.0;
-
   }
-  if (debug_) {
-    for (size_t i = 0; i < bins_.size(); i++) {
-      KALDI_LOG << "bin " << i << ", offset = " << bins_[i].first
-                << ", vec = " << bins_[i].second;
+}
+
+/*
+  Notes on the shape of the modified bins.
+
+  They are shaped like a cosine function from -pi/2 to pi/2 (unlike the standard
+  triangular bins).  We define their diameter as the distance between the
+  first and last nonzero value (pi for the canonical function).  If there are
+  a lot of bins, their diamter is defined by a formula and it's a function of
+  the center frequency f of the bin:
+     diameter = 30 + 60 f / (f + 500).
+  so it increases from 30Hz to 90Hz with a knee around 500Hz.
+  However (and this matters if the number of bins is relatively small), we never
+  let the diameter fall below the point where the crossing-point of this and
+  the next bin would be less than 0.1.  By this I mean is the y-value where the
+  raised-cosines cross.  This value ensures that there won't be too a 'dip'
+  in the middle of the two bins.
+ */
+void MelBanks::ComputeModifiedBins() {
+  int32 num_bins = center_freqs_.Dim();
+  for (int32 bin = 0; bin < num_bins; bin++) {
+    BaseFloat center_freq = center_freqs_(bin),
+        next_center = (bin == num_bins - 1 ?
+                       high_freq_ : center_freqs_(bin + 1));
+
+    // note: breakpoint_ is 500 (Hz).
+    BaseFloat diameter_floor = (next_center - center_freq) * 1.1,
+        diameter = 30.0 + 60.0 * (center_freq / (center_freq + breakpoint_));
+
+    diameter = pow(diameter * diameter + diameter_floor * diameter_floor, 0.5);
+
+    // 'freq_scale' is the scaling factor on the frequencies that will ensure
+    // that the diameter becomes equal to pi, like the canonical bin function
+    // (the cosine from -pi/2 to pi/2).
+    BaseFloat freq_scale = M_PI / diameter;
+
+    // this_bin will be a vector of coefficients that is only
+    // nonzero where this mel bin is active.
+    Vector<BaseFloat> this_bin(num_fft_bins_);
+    int32 first_index = -1, last_index = -1;
+
+    for (int32 i = 0; i < num_fft_bins_; i++) {
+      BaseFloat freq = (fft_bin_width_ * i);  // Center frequency of this fft
+                                             // bin.
+      BaseFloat normalized_freq = freq_scale * (freq - center_freq);
+      if (normalized_freq > -M_PI_2 && normalized_freq < M_PI_2) {
+        BaseFloat weight = cos(normalized_freq);
+        this_bin(i) = weight;
+        if (first_index == -1)
+          first_index = i;
+        last_index = i;
+      }
     }
+    KALDI_ASSERT(first_index != -1 && last_index >= first_index
+                 && "You may have set --num-mel-bins too large.");
+
+    bins_[bin].first = first_index;
+    int32 size = last_index + 1 - first_index;
+    bins_[bin].second.Resize(size);
+    bins_[bin].second.CopyFromVec(this_bin.Range(first_index, size));
   }
 }
 
-MelBanks::MelBanks(const MelBanks &other):
-    center_freqs_(other.center_freqs_),
-    bins_(other.bins_),
-    debug_(other.debug_),
-    htk_mode_(other.htk_mode_) { }
-
-BaseFloat MelBanks::VtlnWarpFreq(BaseFloat vtln_low_cutoff,  // upper+lower frequency cutoffs for VTLN.
-                                 BaseFloat vtln_high_cutoff,
-                                 BaseFloat low_freq,  // upper+lower frequency cutoffs in mel computation
-                                 BaseFloat high_freq,
-                                 BaseFloat vtln_warp_factor,
+BaseFloat MelBanks::VtlnWarpFreq(BaseFloat vtln_warp_factor,
                                  BaseFloat freq) {
   /// This computes a VTLN warping function that is not the same as HTK's one,
   /// but has similar inputs (this function has the advantage of never producing
@@ -180,45 +203,34 @@ BaseFloat MelBanks::VtlnWarpFreq(BaseFloat vtln_low_cutoff,  // upper+lower freq
   ///                       = vtln_low_cutoff * max(1, vtln_warp_factor)
 
 
-  if (freq < low_freq || freq > high_freq) return freq;  // in case this gets called
+  if (freq < low_freq_ || freq > high_freq_) return freq;  // in case this gets called
   // for out-of-range frequencies, just return the freq.
 
-  KALDI_ASSERT(vtln_low_cutoff > low_freq &&
-               "be sure to set the --vtln-low option higher than --low-freq");
-  KALDI_ASSERT(vtln_high_cutoff < high_freq &&
-               "be sure to set the --vtln-high option lower than --high-freq [or negative]");
-  BaseFloat one = 1.0;
-  BaseFloat l = vtln_low_cutoff * std::max(one, vtln_warp_factor);
-  BaseFloat h = vtln_high_cutoff * std::min(one, vtln_warp_factor);
+  BaseFloat l = vtln_low_ * std::max(BaseFloat(1.0), vtln_warp_factor);
+  BaseFloat h = vtln_high_ * std::min(BaseFloat(1.0), vtln_warp_factor);
   BaseFloat scale = 1.0 / vtln_warp_factor;
   BaseFloat Fl = scale * l;  // F(l);
   BaseFloat Fh = scale * h;  // F(h);
-  KALDI_ASSERT(l > low_freq && h < high_freq);
+  KALDI_ASSERT(l > low_freq_ && h < high_freq_);
   // slope of left part of the 3-piece linear function
-  BaseFloat scale_left = (Fl - low_freq) / (l - low_freq);
+  BaseFloat scale_left = (Fl - low_freq_) / (l - low_freq_);
   // [slope of center part is just "scale"]
 
   // slope of right part of the 3-piece linear function
-  BaseFloat scale_right = (high_freq - Fh) / (high_freq - h);
+  BaseFloat scale_right = (high_freq_ - Fh) / (high_freq_ - h);
 
   if (freq < l) {
-    return low_freq + scale_left * (freq - low_freq);
+    return low_freq_ + scale_left * (freq - low_freq_);
   } else if (freq < h) {
     return scale * freq;
   } else {  // freq >= h
-    return high_freq + scale_right * (freq - high_freq);
+    return high_freq_ + scale_right * (freq - high_freq_);
   }
 }
 
-BaseFloat MelBanks::VtlnWarpMelFreq(BaseFloat vtln_low_cutoff,  // upper+lower frequency cutoffs for VTLN.
-                                    BaseFloat vtln_high_cutoff,
-                                    BaseFloat low_freq,  // upper+lower frequency cutoffs in mel computation
-                                    BaseFloat high_freq,
-                                    BaseFloat vtln_warp_factor,
+BaseFloat MelBanks::VtlnWarpMelFreq(BaseFloat vtln_warp_factor,
                                     BaseFloat mel_freq) {
-  return MelScale(VtlnWarpFreq(vtln_low_cutoff, vtln_high_cutoff,
-                               low_freq, high_freq,
-                               vtln_warp_factor, InverseMelScale(mel_freq)));
+  return MelScale(VtlnWarpFreq(vtln_warp_factor, InverseMelScale(mel_freq)));
 }
 
 
@@ -250,6 +262,52 @@ void MelBanks::Compute(const VectorBase<BaseFloat> &power_spectrum,
   }
 }
 
+void MelBanks::SetConfigs(const MelBanksOptions &opts,
+                          const FrameExtractionOptions &frame_opts,
+                          BaseFloat vtln_warp_factor) {
+  BaseFloat sample_freq = frame_opts.samp_freq,
+      nyquist = 0.5 * sample_freq;
+  int32 window_length_padded = frame_opts.PaddedWindowSize();
+  KALDI_ASSERT(window_length_padded % 2 == 0);
+  num_fft_bins_ = window_length_padded / 2;
+  // fft-bin width [think of it as Nyquist-freq / half-window-length]
+  fft_bin_width_ = sample_freq / window_length_padded;
+
+  debug_ = opts.debug_mel;
+
+
+  low_freq_ = opts.low_freq;
+  if (opts.high_freq > 0.0)
+    high_freq_ = opts.high_freq;
+  else
+    high_freq_ = nyquist + opts.high_freq;
+
+  if (low_freq_ < 0.0 || low_freq_ >= nyquist
+      || high_freq_ <= 0.0 || high_freq_ > nyquist
+      || high_freq_ <= low_freq_)
+    KALDI_ERR << "Bad values in options: low-freq " << low_freq_
+              << " and high-freq " << high_freq_ << " vs. nyquist "
+              << nyquist;
+
+  breakpoint_ = (opts.modified ? 500.0 : 700.0);
+  vtln_low_ = opts.vtln_low;
+  if (opts.vtln_high > 0.0)
+    vtln_high_ = opts.vtln_high;
+  else
+    vtln_high_ = opts.vtln_high + nyquist;
+
+  if (vtln_warp_factor != 1.0 &&
+      (vtln_low_ < 0.0 || vtln_low_ <= low_freq_
+       || vtln_low_ >= high_freq_
+       || vtln_high_ <= 0.0 || vtln_high_ >= high_freq_
+       || vtln_high_ <= vtln_low_))
+    KALDI_ERR << "Bad values in options: vtln-low " << vtln_low_
+              << " and vtln-high " << vtln_high_ << ", versus "
+              << "low-freq " << low_freq_ << " and high-freq "
+              << high_freq_;
+}
+
+
 void ComputeLifterCoeffs(BaseFloat Q, VectorBase<BaseFloat> *coeffs) {
   // Compute liftering coefficients (scaling on cepstral coeffs)
   // coeffs are numbered slightly differently from HTK: the zeroth
diff --git a/src/feat/mel-computations.h b/src/feat/mel-computations.h
index 5df36c8cb90..12c4d056608 100644
--- a/src/feat/mel-computations.h
+++ b/src/feat/mel-computations.h
@@ -48,6 +48,8 @@ struct MelBanksOptions {
   BaseFloat vtln_low;  // vtln lower cutoff of warping function.
   BaseFloat vtln_high;  // vtln upper cutoff of warping function: if negative, added
                         // to the Nyquist frequency to get the cutoff.
+  bool modified;       // If true, use 'modified' MFCC, which uses a breakpoint of
+                       // 900 instead of 700.
   bool debug_mel;
   // htk_mode is a "hidden" config, it does not show up on command line.
   // Enables more exact compatibibility with HTK, for testing purposes.  Affects
@@ -55,7 +57,7 @@ struct MelBanksOptions {
   bool htk_mode;
   explicit MelBanksOptions(int num_bins = 25)
       : num_bins(num_bins), low_freq(20), high_freq(0), vtln_low(100),
-        vtln_high(-500), debug_mel(false), htk_mode(false) {}
+        vtln_high(-500), modified(false), debug_mel(false), htk_mode(false) {}
 
   void Register(OptionsItf *opts) {
     opts->Register("num-mel-bins", &num_bins,
@@ -69,6 +71,8 @@ struct MelBanksOptions {
     opts->Register("vtln-high", &vtln_high,
                    "High inflection point in piecewise linear VTLN warping function"
                    " (if negative, offset from high-mel-freq");
+    opts->Register("modified", &modified,
+                   "Modified MFCCs, based on paper XXXX.  TODO: document this.");
     opts->Register("debug-mel", &debug_mel,
                    "Print out debugging information for mel bin computation");
   }
@@ -78,30 +82,6 @@ struct MelBanksOptions {
 class MelBanks {
  public:
 
-  static inline BaseFloat InverseMelScale(BaseFloat mel_freq) {
-    return 700.0f * (expf (mel_freq / 1127.0f) - 1.0f);
-  }
-
-  static inline BaseFloat MelScale(BaseFloat freq) {
-    return 1127.0f * logf (1.0f + freq / 700.0f);
-  }
-
-  static BaseFloat VtlnWarpFreq(BaseFloat vtln_low_cutoff,
-                                BaseFloat vtln_high_cutoff,  // discontinuities in warp func
-                                BaseFloat low_freq,
-                                BaseFloat high_freq,  // upper+lower frequency cutoffs in
-                                // the mel computation
-                                BaseFloat vtln_warp_factor,
-                                BaseFloat freq);
-
-  static BaseFloat VtlnWarpMelFreq(BaseFloat vtln_low_cutoff,
-                                   BaseFloat vtln_high_cutoff,
-                                   BaseFloat low_freq,
-                                   BaseFloat high_freq,
-                                   BaseFloat vtln_warp_factor,
-                                   BaseFloat mel_freq);
-
-
   MelBanks(const MelBanksOptions &opts,
            const FrameExtractionOptions &frame_opts,
            BaseFloat vtln_warp_factor);
@@ -116,18 +96,74 @@ class MelBanks {
   // returns vector of central freq of each bin; needed by plp code.
   const Vector<BaseFloat> &GetCenterFreqs() const { return center_freqs_; }
 
-  // Copy constructor
-  MelBanks(const MelBanks &other);
+  // Use the default copy constructor
  private:
+
+  // This function checks that the provided options make sense, and also sets
+  // configuration variables like breakpoint_ in this class.
+  void SetConfigs(const MelBanksOptions &opts,
+                  const FrameExtractionOptions &frame_opts,
+                  BaseFloat vtln_warp_factor);
+
+  // We use simplified formulas for the mel and inverse mel scale, since for
+  // this application, the multiplicative factor doesn't matter.  Note:
+  // breakpoint_ is 700 for normal mel, or 900 for modified.
+  inline BaseFloat InverseMelScale(BaseFloat mel_freq) {
+    return 3500.0 * (expf((mel_freq - breakpoint_) / 3500.0) - 1.0);
+  }
+
+  inline BaseFloat MelScale(BaseFloat freq) {
+    return log (breakpoint_ + 3500.0 * log (1.0 + freq / 3500.0));
+  }
+
+  BaseFloat VtlnWarpFreq(BaseFloat vtln_warp_factor, BaseFloat freq);
+
+
+  BaseFloat VtlnWarpMelFreq(BaseFloat vtln_warp_factor, BaseFloat mel_freq);
+
+  // This sets up the 'bins_' member, for the regular (not modified)
+  // computation.  It assumes center_freqs_ is already set up.
+  // 'htk_mode' is expected to be a copy of opts.htk_mode as given to the
+  // constructor.
+  void ComputeBins(bool htk_mode);
+
+  // This sets up the 'bins_' member, for the modified computaion
+  // with cosine-shaped bins that are more tightly
+  // computation.  It assumes center_freqs_ is already set up.
+  // 'htk_mode' is expected to be a copy of opts.htk_mode as given to the
+  // constructor.
+  void ComputeModifiedBins();
+
   // Disallow assignment
   MelBanks &operator = (const MelBanks &other);
 
-  // center frequencies of bins, numbered from 0 ... num_bins-1.
-  // Needed by GetCenterFreqs().
+
+  // The following few variables are derived from the configuration
+  // options passed in; they are used in converting to and from Mel frequencies,
+  // and for other purposes.
+  BaseFloat breakpoint_;  // The breakpoint in the mel scale: 700 normally;
+                          // 900 if opts.modified is true.
+  BaseFloat low_freq_;  // opts.low_freq
+  BaseFloat high_freq_;  // The same as opts.high_freq if it's >= 0, or
+                         // otherwise the Nyquist plus opts.high_freq.
+  BaseFloat vtln_low_;  // opts.vtln_low; the lower cutoff for VTLN.
+  BaseFloat vtln_high_;  // opts.vtln_high; the upper cutoff for VTLN.
+
+  int32 num_fft_bins_;  // The number of FFT frequency bins (actually, excluding
+                        // the one at the Nyquist).  Equal to half the padded
+                        // window length.
+  BaseFloat fft_bin_width_;  // The frequency separation between successive
+                             // FFT bins: equal nyquist / num_fft_bins_.
+
+
+  // center frequencies of bins (in Hz), numbered from 0 ... num_bins-1.  Needed
+  // by GetCenterFreqs().
   Vector<BaseFloat> center_freqs_;
 
-  // the "bins_" vector is a vector, one for each bin, of a pair:
-  // (the first nonzero fft-bin), (the vector of weights).
+  // the "bins_" vector is a vector, one for each mel bin, of a pair: (the
+  // first nonzero fft-bin), (the vector of weights).  The pair of (int32,
+  // Vector) is provided for efficiency, to avoid having a larger vector with
+  // many zero entries.
   std::vector<std::pair<int32, Vector<BaseFloat> > > bins_;
 
   bool debug_;

From 4eb4862ee321ec04a9f7916a5ad3d04b6d10d652 Mon Sep 17 00:00:00 2001
From: Pegita <pegahgh@gmail.com>
Date: Sun, 16 Dec 2018 15:09:24 -0500
Subject: [PATCH 2/9] pushed to trigger the build (travis issue)

---
 src/feat/mel-computations.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/feat/mel-computations.h b/src/feat/mel-computations.h
index 12c4d056608..0a2e2bc8482 100644
--- a/src/feat/mel-computations.h
+++ b/src/feat/mel-computations.h
@@ -142,7 +142,7 @@ class MelBanks {
   // options passed in; they are used in converting to and from Mel frequencies,
   // and for other purposes.
   BaseFloat breakpoint_;  // The breakpoint in the mel scale: 700 normally;
-                          // 900 if opts.modified is true.
+                          // 500 if opts.modified is true.
   BaseFloat low_freq_;  // opts.low_freq
   BaseFloat high_freq_;  // The same as opts.high_freq if it's >= 0, or
                          // otherwise the Nyquist plus opts.high_freq.

From 126c89a1307a9ff6ca57d776f9c238cc357fe150 Mon Sep 17 00:00:00 2001
From: Pegita <pegahgh@gmail.com>
Date: Fri, 21 Dec 2018 11:41:37 -0500
Subject: [PATCH 3/9] modified test set w.r.t new VtlnWarpMelFreq function.

---
 src/feat/feature-mfcc-test.cc | 41 ++++++++++++-----------------------
 src/feat/mel-computations.cc  |  3 ++-
 src/feat/mel-computations.h   | 34 ++++++++++++++++++++++-------
 3 files changed, 42 insertions(+), 36 deletions(-)

diff --git a/src/feat/feature-mfcc-test.cc b/src/feat/feature-mfcc-test.cc
index c4367139707..e81458741ef 100644
--- a/src/feat/feature-mfcc-test.cc
+++ b/src/feat/feature-mfcc-test.cc
@@ -95,8 +95,8 @@ static void UnitTestSimple() {
   op.frame_opts.round_to_power_of_two = true;
   op.mel_opts.low_freq = 0.0;
   op.mel_opts.htk_mode = true;
+  op.mel_opts.modified = (Rand() % 2 == 0 ? true : false);
   op.htk_compat = true;
-
   Mfcc mfcc(op);
   // use default parameters
 
@@ -613,42 +613,29 @@ static void UnitTestHTKCompare6() {
   }
 
   std::cout << "Test passed :)\n\n";
-  
+
   unlink("tmp.test.wav.fea_kaldi.6");
 }
 
 void UnitTestVtln() {
   // Test the function VtlnWarpFreq.
-  BaseFloat low_freq = 10, high_freq = 7800,
-      vtln_low_cutoff = 20, vtln_high_cutoff = 7400;
-
+  BaseFloat low_freq = 10, high_freq = 7800;
+  MelBanksOptions mel_opts;
+  mel_opts.low_freq = low_freq, mel_opts.high_freq = high_freq;
+  FrameExtractionOptions frame_opts;
+  MelBanks melfbank(mel_opts, frame_opts, 0.9);
   for (size_t i = 0; i < 100; i++) {
     BaseFloat freq = 5000, warp_factor = 0.9 + RandUniform() * 0.2;
-    AssertEqual(MelBanks::VtlnWarpFreq(vtln_low_cutoff, vtln_high_cutoff,
-                             low_freq, high_freq, warp_factor,
-                             freq),
-                freq / warp_factor);
-
-    AssertEqual(MelBanks::VtlnWarpFreq(vtln_low_cutoff, vtln_high_cutoff,
-                             low_freq, high_freq, warp_factor,
-                             low_freq),
-                low_freq);
-    AssertEqual(MelBanks::VtlnWarpFreq(vtln_low_cutoff, vtln_high_cutoff,
-                             low_freq, high_freq, warp_factor,
-                             high_freq),
-                high_freq);
+    AssertEqual(melfbank.VtlnWarpFreq(warp_factor, freq), freq / warp_factor);
+
+    AssertEqual(melfbank.VtlnWarpFreq(warp_factor, low_freq), low_freq);
+    AssertEqual(melfbank.VtlnWarpFreq(warp_factor, high_freq), high_freq);
     BaseFloat freq2 = low_freq + (high_freq-low_freq) * RandUniform(),
         freq3 = freq2 +  (high_freq-freq2) * RandUniform();  // freq3>=freq2
-    BaseFloat w2 = MelBanks::VtlnWarpFreq(vtln_low_cutoff, vtln_high_cutoff,
-                                low_freq, high_freq, warp_factor,
-                                freq2);
-    BaseFloat w3 = MelBanks::VtlnWarpFreq(vtln_low_cutoff, vtln_high_cutoff,
-                                low_freq, high_freq, warp_factor,
-                                freq3);
+    BaseFloat w2 = melfbank.VtlnWarpFreq(warp_factor, freq2);
+    BaseFloat w3 = melfbank.VtlnWarpFreq(warp_factor, freq3);
     KALDI_ASSERT(w3 >= w2);  // increasing function.
-    BaseFloat w3dash = MelBanks::VtlnWarpFreq(vtln_low_cutoff, vtln_high_cutoff,
-                                    low_freq, high_freq, 1.0,
-                                    freq3);
+    BaseFloat w3dash = melfbank.VtlnWarpFreq(1.0, freq3);
     AssertEqual(w3dash, freq3);
   }
 }
diff --git a/src/feat/mel-computations.cc b/src/feat/mel-computations.cc
index bd40a527b29..e0b8d8ca268 100644
--- a/src/feat/mel-computations.cc
+++ b/src/feat/mel-computations.cc
@@ -92,7 +92,7 @@ void MelBanks::ComputeBins(bool htk_mode) {
         if (mel <= center_mel)
           weight = (mel - left_mel) / (center_mel - left_mel);
         else
-         weight = (right_mel-mel) / (right_mel - center_mel);
+         weight = (right_mel - mel) / (right_mel - center_mel);
         this_bin(i) = weight;
         if (first_index == -1)
           first_index = i;
@@ -290,6 +290,7 @@ void MelBanks::SetConfigs(const MelBanksOptions &opts,
               << nyquist;
 
   breakpoint_ = (opts.modified ? 500.0 : 700.0);
+  sec_breakpoint_ = (opts.modified ? 3500 : -1);
   vtln_low_ = opts.vtln_low;
   if (opts.vtln_high > 0.0)
     vtln_high_ = opts.vtln_high;
diff --git a/src/feat/mel-computations.h b/src/feat/mel-computations.h
index 0a2e2bc8482..b1aa5f591f5 100644
--- a/src/feat/mel-computations.h
+++ b/src/feat/mel-computations.h
@@ -72,7 +72,12 @@ struct MelBanksOptions {
                    "High inflection point in piecewise linear VTLN warping function"
                    " (if negative, offset from high-mel-freq");
     opts->Register("modified", &modified,
-                   "Modified MFCCs, based on paper XXXX.  TODO: document this.");
+                   "Modified MFCCs, based on paper 'An alternative to MFCCs for ASR' "
+                   "(in progess for publication). This uses a cosine-type "
+                   "filters with a modified mel scale for ceneter frequency "
+                   "with more resolution around 1st and 2nd formant frequencies."
+                   "The new bandwidth is computed as a combination of linear bandwidth "
+                   "and the bandwidth computed based on filter overlap.");
     opts->Register("debug-mel", &debug_mel,
                    "Print out debugging information for mel bin computation");
   }
@@ -96,6 +101,11 @@ class MelBanks {
   // returns vector of central freq of each bin; needed by plp code.
   const Vector<BaseFloat> &GetCenterFreqs() const { return center_freqs_; }
 
+  BaseFloat VtlnWarpFreq(BaseFloat vtln_warp_factor, BaseFloat freq);
+
+
+  BaseFloat VtlnWarpMelFreq(BaseFloat vtln_warp_factor, BaseFloat mel_freq);
+
   // Use the default copy constructor
  private:
 
@@ -109,18 +119,19 @@ class MelBanks {
   // this application, the multiplicative factor doesn't matter.  Note:
   // breakpoint_ is 700 for normal mel, or 900 for modified.
   inline BaseFloat InverseMelScale(BaseFloat mel_freq) {
-    return 3500.0 * (expf((mel_freq - breakpoint_) / 3500.0) - 1.0);
+    if (sec_breakpoint_ > 0.0)
+      return 3500.0 * (expf((expf(mel_freq) - breakpoint_) / 3500.0) - 1.0);
+    else
+      return breakpoint_ * (expf(mel_freq) - 1.0);
   }
 
   inline BaseFloat MelScale(BaseFloat freq) {
-    return log (breakpoint_ + 3500.0 * log (1.0 + freq / 3500.0));
+    if (sec_breakpoint_ > 0.0)
+      return log (breakpoint_ + 3500.0 * log (1.0 + freq / 3500.0));
+    else
+      return log(1.0 + freq / breakpoint_);
   }
 
-  BaseFloat VtlnWarpFreq(BaseFloat vtln_warp_factor, BaseFloat freq);
-
-
-  BaseFloat VtlnWarpMelFreq(BaseFloat vtln_warp_factor, BaseFloat mel_freq);
-
   // This sets up the 'bins_' member, for the regular (not modified)
   // computation.  It assumes center_freqs_ is already set up.
   // 'htk_mode' is expected to be a copy of opts.htk_mode as given to the
@@ -143,6 +154,13 @@ class MelBanks {
   // and for other purposes.
   BaseFloat breakpoint_;  // The breakpoint in the mel scale: 700 normally;
                           // 500 if opts.modified is true.
+  BaseFloat sec_breakpoint_; // The second breakpoint used in the modified
+                             // mel scale;
+                             // The range is [1500,3500]Hz and it corresponds to
+                             // second breakpoint in the mel scale mainly and
+                             // results in higher center frequency concentration
+                             // around this frequency
+                             // (e.g. avg. freq for second formant)
   BaseFloat low_freq_;  // opts.low_freq
   BaseFloat high_freq_;  // The same as opts.high_freq if it's >= 0, or
                          // otherwise the Nyquist plus opts.high_freq.

From e272089fd6c3f74767b646e82c9bef57d22665c9 Mon Sep 17 00:00:00 2001
From: Pegita <pegahgh@gmail.com>
Date: Sat, 23 Mar 2019 20:16:48 -0400
Subject: [PATCH 4/9] fixed typos.

---
 src/feat/mel-computations.cc | 16 ++++++++--------
 src/feat/mel-computations.h  | 21 +++++++++++----------
 2 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/src/feat/mel-computations.cc b/src/feat/mel-computations.cc
index e0b8d8ca268..228c54672e9 100644
--- a/src/feat/mel-computations.cc
+++ b/src/feat/mel-computations.cc
@@ -121,11 +121,11 @@ void MelBanks::ComputeBins(bool htk_mode) {
   first and last nonzero value (pi for the canonical function).  If there are
   a lot of bins, their diamter is defined by a formula and it's a function of
   the center frequency f of the bin:
-     diameter = 30 + 60 f / (f + 500).
-  so it increases from 30Hz to 90Hz with a knee around 500Hz.
+     diameter = alpha1 + alpha2 * f / (f + breakpoint_).
+  So, it increases from alpha1 Hz to (alpha1 + alpha2) Hz with a knee around breakpoint_ (Hz).
   However (and this matters if the number of bins is relatively small), we never
   let the diameter fall below the point where the crossing-point of this and
-  the next bin would be less than 0.1.  By this I mean is the y-value where the
+  the next bin would be less than 0.2.  By this I mean is the y-value where the
   raised-cosines cross.  This value ensures that there won't be too a 'dip'
   in the middle of the two bins.
  */
@@ -136,11 +136,11 @@ void MelBanks::ComputeModifiedBins() {
         next_center = (bin == num_bins - 1 ?
                        high_freq_ : center_freqs_(bin + 1));
 
-    // note: breakpoint_ is 500 (Hz).
-    BaseFloat diameter_floor = (next_center - center_freq) * 1.1,
-        diameter = 30.0 + 60.0 * (center_freq / (center_freq + breakpoint_));
+    // note: breakpoint_ is 900 (Hz).
+    BaseFloat diameter_floor = (next_center - center_freq) * 1.2,
+        diameter = 80.0 + 100.0 * (center_freq / (center_freq + breakpoint_));
 
-    diameter = pow(diameter * diameter + diameter_floor * diameter_floor, 0.5);
+    diameter = sqrt(diameter * diameter + diameter_floor * diameter_floor);
 
     // 'freq_scale' is the scaling factor on the frequencies that will ensure
     // that the diameter becomes equal to pi, like the canonical bin function
@@ -290,7 +290,7 @@ void MelBanks::SetConfigs(const MelBanksOptions &opts,
               << nyquist;
 
   breakpoint_ = (opts.modified ? 500.0 : 700.0);
-  sec_breakpoint_ = (opts.modified ? 3500 : -1);
+  second_breakpoint_ = (opts.modified ? 3500 : -1);
   vtln_low_ = opts.vtln_low;
   if (opts.vtln_high > 0.0)
     vtln_high_ = opts.vtln_high;
diff --git a/src/feat/mel-computations.h b/src/feat/mel-computations.h
index b1aa5f591f5..2603a321ce4 100644
--- a/src/feat/mel-computations.h
+++ b/src/feat/mel-computations.h
@@ -48,8 +48,7 @@ struct MelBanksOptions {
   BaseFloat vtln_low;  // vtln lower cutoff of warping function.
   BaseFloat vtln_high;  // vtln upper cutoff of warping function: if negative, added
                         // to the Nyquist frequency to get the cutoff.
-  bool modified;       // If true, use 'modified' MFCC, which uses a breakpoint of
-                       // 900 instead of 700.
+  bool modified;       // If true, use 'modified' MFCC.
   bool debug_mel;
   // htk_mode is a "hidden" config, it does not show up on command line.
   // Enables more exact compatibibility with HTK, for testing purposes.  Affects
@@ -74,10 +73,12 @@ struct MelBanksOptions {
     opts->Register("modified", &modified,
                    "Modified MFCCs, based on paper 'An alternative to MFCCs for ASR' "
                    "(in progess for publication). This uses a cosine-type "
-                   "filters with a modified mel scale for ceneter frequency "
-                   "with more resolution around 1st and 2nd formant frequencies."
-                   "The new bandwidth is computed as a combination of linear bandwidth "
-                   "and the bandwidth computed based on filter overlap.");
+                   "filters with a modified mel scale with two breakpoints "
+                   ", which control filters's resolution in the frequency region."
+                   "Also, a filter bandwidth is computed as a norm of two "
+                   "bandwidths 1) a bandwidth value computed using linear "
+                   "equation, and 2) a bandwidth value computed based on filter "
+                   "overlap.");
     opts->Register("debug-mel", &debug_mel,
                    "Print out debugging information for mel bin computation");
   }
@@ -119,14 +120,14 @@ class MelBanks {
   // this application, the multiplicative factor doesn't matter.  Note:
   // breakpoint_ is 700 for normal mel, or 900 for modified.
   inline BaseFloat InverseMelScale(BaseFloat mel_freq) {
-    if (sec_breakpoint_ > 0.0)
-      return 3500.0 * (expf((expf(mel_freq) - breakpoint_) / 3500.0) - 1.0);
+    if (second_breakpoint_ > 0.0)
+      return second_breakpoint_ * (expf((expf(mel_freq) - breakpoint_) / second_breakpoint_) - 1.0);
     else
       return breakpoint_ * (expf(mel_freq) - 1.0);
   }
 
   inline BaseFloat MelScale(BaseFloat freq) {
-    if (sec_breakpoint_ > 0.0)
+    if (second_breakpoint_ > 0.0)
       return log (breakpoint_ + 3500.0 * log (1.0 + freq / 3500.0));
     else
       return log(1.0 + freq / breakpoint_);
@@ -154,7 +155,7 @@ class MelBanks {
   // and for other purposes.
   BaseFloat breakpoint_;  // The breakpoint in the mel scale: 700 normally;
                           // 500 if opts.modified is true.
-  BaseFloat sec_breakpoint_; // The second breakpoint used in the modified
+  BaseFloat second_breakpoint_; // The second breakpoint used in the modified
                              // mel scale;
                              // The range is [1500,3500]Hz and it corresponds to
                              // second breakpoint in the mel scale mainly and

From 0f01df645254ee5f4613b35c6fc107de16d284ef Mon Sep 17 00:00:00 2001
From: Daniel Povey <dpovey@gmail.com>
Date: Sun, 24 Mar 2019 16:41:09 -0400
Subject: [PATCH 5/9] [src] Clean up modified mel code

---
 src/feat/mel-computations.cc | 52 +++++++++++++++++++++++++-----------
 src/feat/mel-computations.h  | 21 ++++++++-------
 2 files changed, 48 insertions(+), 25 deletions(-)

diff --git a/src/feat/mel-computations.cc b/src/feat/mel-computations.cc
index 228c54672e9..b3c69db7805 100644
--- a/src/feat/mel-computations.cc
+++ b/src/feat/mel-computations.cc
@@ -118,16 +118,33 @@ void MelBanks::ComputeBins(bool htk_mode) {
 
   They are shaped like a cosine function from -pi/2 to pi/2 (unlike the standard
   triangular bins).  We define their diameter as the distance between the
-  first and last nonzero value (pi for the canonical function).  If there are
-  a lot of bins, their diamter is defined by a formula and it's a function of
-  the center frequency f of the bin:
-     diameter = alpha1 + alpha2 * f / (f + breakpoint_).
-  So, it increases from alpha1 Hz to (alpha1 + alpha2) Hz with a knee around breakpoint_ (Hz).
-  However (and this matters if the number of bins is relatively small), we never
-  let the diameter fall below the point where the crossing-point of this and
-  the next bin would be less than 0.2.  By this I mean is the y-value where the
-  raised-cosines cross.  This value ensures that there won't be too a 'dip'
-  in the middle of the two bins.
+  first and last nonzero value (pi for the canonical function).  We choose
+  the diameter as:
+       d = sqrt(d1^2 + d2^2)
+  (this function may be viewed as a kind of soft-max), where d1 and d2 are
+  two different formulas for the diameter that we describe below.
+
+    d1 is a formula that ensures the bins overlap by at least a minimal amount.
+
+   Let bin_diff be the difference in Hz between this bin's center-frequency
+   and the next bin's center-frequency, or (if this is the last bin),
+   the user-specified `high-freq` which is the top of the range of frequencies
+   we cover.  Then:
+
+       d1 = 1.1 * bin_diff
+
+   The formula for d2 is designed to provide a reasonable floor so the bandwidth
+   don't get ridiculously narrow as we add more bins, and to approximate what we
+   observed the filter diameters to look like when learning filterbanks via DNNs.
+   The formula is:
+
+       d2 = 50 + 50 * f / (f + 700)
+
+   which roughly means: start with a diameter of 50Hz, increasing gradually to
+   100Hz for bins with center frequency more than about 700Hz.  There is no
+   rocket science behind this formula; it was obtained through a combination of
+   trying to match the DNN-learned filterbank bandwidths (cite: Pegah's thesis),
+   and manual tuning.
  */
 void MelBanks::ComputeModifiedBins() {
   int32 num_bins = center_freqs_.Dim();
@@ -136,11 +153,14 @@ void MelBanks::ComputeModifiedBins() {
         next_center = (bin == num_bins - 1 ?
                        high_freq_ : center_freqs_(bin + 1));
 
-    // note: breakpoint_ is 900 (Hz).
-    BaseFloat diameter_floor = (next_center - center_freq) * 1.2,
-        diameter = 80.0 + 100.0 * (center_freq / (center_freq + breakpoint_));
+    BaseFloat d1 = (next_center - center_freq) * 1.1,
+              d2 = 50.0 + 50.0 * (center_freq / (center_freq + 700.0));
 
-    diameter = sqrt(diameter * diameter + diameter_floor * diameter_floor);
+    // 'diameter' is in Hz; it represents the distance on the frequency axis
+    // between the first and last nonzero points of the raised-cosine window
+    // function.  This formula applies our heuristic, described above, to choose
+    // it.
+    BaseFloat diameter = sqrt(d1 * d1 + d2 * d2);
 
     // 'freq_scale' is the scaling factor on the frequencies that will ensure
     // that the diameter becomes equal to pi, like the canonical bin function
@@ -289,8 +309,8 @@ void MelBanks::SetConfigs(const MelBanksOptions &opts,
               << " and high-freq " << high_freq_ << " vs. nyquist "
               << nyquist;
 
-  breakpoint_ = (opts.modified ? 500.0 : 700.0);
-  second_breakpoint_ = (opts.modified ? 3500 : -1);
+  breakpoint_ = (opts.modified ? 300.0 : 700.0);
+  second_breakpoint_ = (opts.modified ? 2000.0 : -1);
   vtln_low_ = opts.vtln_low;
   if (opts.vtln_high > 0.0)
     vtln_high_ = opts.vtln_high;
diff --git a/src/feat/mel-computations.h b/src/feat/mel-computations.h
index b0b96d7af88..f7f163aaaf8 100644
--- a/src/feat/mel-computations.h
+++ b/src/feat/mel-computations.h
@@ -116,21 +116,24 @@ class MelBanks {
                   const FrameExtractionOptions &frame_opts,
                   BaseFloat vtln_warp_factor);
 
-  // We use simplified formulas for the mel and inverse mel scale, since for
-  // this application, the multiplicative factor doesn't matter.  Note:
-  // breakpoint_ is 700 for normal mel, or 900 for modified.
   inline BaseFloat InverseMelScale(BaseFloat mel_freq) {
-    if (second_breakpoint_ > 0.0)
-      return second_breakpoint_ * (expf((expf(mel_freq) - breakpoint_) / second_breakpoint_) - 1.0);
+    BaseFloat b1 = breakpoint_, b2 = second_breakpoint_;
+    if (b2 > 0.0)
+      return b2 * (expf((expf(mel_freq) - b1) / b2) - 1.0);
     else
-      return breakpoint_ * (expf(mel_freq) - 1.0);
+      return b1 * (expf(mel_freq) - 1.0);
   }
 
   inline BaseFloat MelScale(BaseFloat freq) {
-    if (second_breakpoint_ > 0.0)
-      return log (breakpoint_ + 3500.0 * log (1.0 + freq / 3500.0));
-    else
+    BaseFloat b1 = breakpoint_, b2 = second_breakpoint_;
+    if (b2 > 0.0) {
+      // Modified Mel: linear, till ~b1, then log till ~b2, then log(log)
+      return log (b1 + b2 * log(1.0 + freq / b2));
+    } else {
+      // Mel: linear till ~b1 = 700, then logarithmic.  We ignore the scaling
+      // factor as it makes no difference to our application.
       return log(1.0 + freq / breakpoint_);
+    }
   }
 
   // This sets up the 'bins_' member, for the regular (not modified)

From 5e0d7140bba79e801ee3939e51e34f8a3eb03678 Mon Sep 17 00:00:00 2001
From: Daniel Povey <dpovey@gmail.com>
Date: Sun, 24 Mar 2019 16:45:19 -0400
Subject: [PATCH 6/9] [src] Remove debug code

---
 src/feat/mel-computations.cc | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/src/feat/mel-computations.cc b/src/feat/mel-computations.cc
index b3c69db7805..80428ba7d65 100644
--- a/src/feat/mel-computations.cc
+++ b/src/feat/mel-computations.cc
@@ -158,8 +158,8 @@ void MelBanks::ComputeModifiedBins() {
 
     // 'diameter' is in Hz; it represents the distance on the frequency axis
     // between the first and last nonzero points of the raised-cosine window
-    // function.  This formula applies our heuristic, described above, to choose
-    // it.
+    // function.  This formula applies our heuristic, described above,
+    // to choose the diameter.
     BaseFloat diameter = sqrt(d1 * d1 + d2 * d2);
 
     // 'freq_scale' is the scaling factor on the frequencies that will ensure
@@ -273,13 +273,6 @@ void MelBanks::Compute(const VectorBase<BaseFloat> &power_spectrum,
     // it early.
     KALDI_ASSERT(!KALDI_ISNAN((*mel_energies_out)(i)));
   }
-
-  if (debug_) {
-    fprintf(stderr, "MEL BANKS:\n");
-    for (int32 i = 0; i < num_bins; i++)
-      fprintf(stderr, " %f", (*mel_energies_out)(i));
-    fprintf(stderr, "\n");
-  }
 }
 
 void MelBanks::SetConfigs(const MelBanksOptions &opts,

From ef9450f2a33bad33fd1dca3183c2443a7287a6b2 Mon Sep 17 00:00:00 2001
From: Daniel Povey <dpovey@gmail.com>
Date: Wed, 27 Mar 2019 21:41:18 -0400
Subject: [PATCH 7/9] [src] Cleanups to mel-computations.h

---
 src/feat/mel-computations.h | 31 +++++++++++++------------------
 1 file changed, 13 insertions(+), 18 deletions(-)

diff --git a/src/feat/mel-computations.h b/src/feat/mel-computations.h
index f7f163aaaf8..3231671fa8b 100644
--- a/src/feat/mel-computations.h
+++ b/src/feat/mel-computations.h
@@ -71,14 +71,9 @@ struct MelBanksOptions {
                    "High inflection point in piecewise linear VTLN warping function"
                    " (if negative, offset from high-mel-freq");
     opts->Register("modified", &modified,
-                   "Modified MFCCs, based on paper 'An alternative to MFCCs for ASR' "
-                   "(in progess for publication). This uses a cosine-type "
-                   "filters with a modified mel scale with two breakpoints "
-                   ", which control filters's resolution in the frequency region."
-                   "Also, a filter bandwidth is computed as a norm of two "
-                   "bandwidths 1) a bandwidth value computed using linear "
-                   "equation, and 2) a bandwidth value computed based on filter "
-                   "overlap.");
+                   "If true, use a modified form of the Mel scale that gives "
+                   "more emphasis to lower frequencies, and use differently "
+                   "tuned bin shapes and widths than normal.");
     opts->Register("debug-mel", &debug_mel,
                    "Print out debugging information for mel bin computation");
   }
@@ -118,7 +113,7 @@ class MelBanks {
 
   inline BaseFloat InverseMelScale(BaseFloat mel_freq) {
     BaseFloat b1 = breakpoint_, b2 = second_breakpoint_;
-    if (b2 > 0.0)
+    if (b2 > 0.0)  // modified Mel scale
       return b2 * (expf((expf(mel_freq) - b1) / b2) - 1.0);
     else
       return b1 * (expf(mel_freq) - 1.0);
@@ -132,7 +127,7 @@ class MelBanks {
     } else {
       // Mel: linear till ~b1 = 700, then logarithmic.  We ignore the scaling
       // factor as it makes no difference to our application.
-      return log(1.0 + freq / breakpoint_);
+      return log(1.0 + freq / b1);
     }
   }
 
@@ -153,18 +148,18 @@ class MelBanks {
   MelBanks &operator = (const MelBanks &other);
 
 
+
   // The following few variables are derived from the configuration
   // options passed in; they are used in converting to and from Mel frequencies,
   // and for other purposes.
-  BaseFloat breakpoint_;  // The breakpoint in the mel scale: 700 normally;
-                          // 500 if opts.modified is true.
+  BaseFloat breakpoint_;        // The breakpoint of the Mel scale (700) if we
+                                // are using mel scale; otherwise the first
+                                // breakpoint in the modified-mel scale,
+                                // e.g. 300.  Only relevant if --modified=true
   BaseFloat second_breakpoint_; // The second breakpoint used in the modified
-                             // mel scale;
-                             // The range is [1500,3500]Hz and it corresponds to
-                             // second breakpoint in the mel scale mainly and
-                             // results in higher center frequency concentration
-                             // around this frequency
-                             // (e.g. avg. freq for second formant)
+                                // mel scale, e.g. 2000.
+                                // Only relevant if --modified=true
+
   BaseFloat low_freq_;  // opts.low_freq
   BaseFloat high_freq_;  // The same as opts.high_freq if it's >= 0, or
                          // otherwise the Nyquist plus opts.high_freq.

From d5ae5826577976251b22fb07695040810486532b Mon Sep 17 00:00:00 2001
From: Daniel Povey <dpovey@gmail.com>
Date: Mon, 22 Apr 2019 22:44:23 -0400
Subject: [PATCH 8/9] [src] Configuration change in modified mel

---
 src/feat/mel-computations.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/feat/mel-computations.cc b/src/feat/mel-computations.cc
index 80428ba7d65..838caf94ffa 100644
--- a/src/feat/mel-computations.cc
+++ b/src/feat/mel-computations.cc
@@ -154,7 +154,7 @@ void MelBanks::ComputeModifiedBins() {
                        high_freq_ : center_freqs_(bin + 1));
 
     BaseFloat d1 = (next_center - center_freq) * 1.1,
-              d2 = 50.0 + 50.0 * (center_freq / (center_freq + 700.0));
+              d2 = 60.0 + 50.0 * (center_freq / (center_freq + 700.0));
 
     // 'diameter' is in Hz; it represents the distance on the frequency axis
     // between the first and last nonzero points of the raised-cosine window

From 390ef59b1f6971f42d790a6fd0eb75d702d30e84 Mon Sep 17 00:00:00 2001
From: Daniel Povey <dpovey@gmail.com>
Date: Sun, 28 Apr 2019 14:15:16 -0400
Subject: [PATCH 9/9] [src] Fix to breakpoint in bandwidth computation

---
 src/feat/mel-computations.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/feat/mel-computations.cc b/src/feat/mel-computations.cc
index 838caf94ffa..bf1563434eb 100644
--- a/src/feat/mel-computations.cc
+++ b/src/feat/mel-computations.cc
@@ -154,7 +154,7 @@ void MelBanks::ComputeModifiedBins() {
                        high_freq_ : center_freqs_(bin + 1));
 
     BaseFloat d1 = (next_center - center_freq) * 1.1,
-              d2 = 60.0 + 50.0 * (center_freq / (center_freq + 700.0));
+        d2 = 60.0 + 50.0 * (center_freq / (center_freq + breakpoint_));
 
     // 'diameter' is in Hz; it represents the distance on the frequency axis
     // between the first and last nonzero points of the raised-cosine window