Add script for deep speech benchmark (#4750)

yhliang2018 · web-flow · commit de715b330602 · 2018-07-11T17:28:32.000-07:00
* Add script for benchmark

* Add random seed
diff --git a/research/deep_speech/README.md b/research/deep_speech/README.md
@@ -36,7 +36,9 @@ or
 pip install -r requirements.txt
 ```
 
-### Download and preprocess dataset
+### Run each step individually
+
+#### Download and preprocess dataset
 To download the dataset, issue the following command:
 ```
 python data/download.py
@@ -46,7 +48,7 @@ Arguments:
 
 Use the `--help` or `-h` flag to get a full list of possible arguments.
 
-### Train and evaluate model
+#### Train and evaluate model
 To train and evaluate the model, issue the following command:
 ```
 python deep_speech.py
@@ -59,3 +61,9 @@ Arguments:
 
 There are other arguments about DeepSpeech2 model and training/evaluation process. Use the `--help` or `-h` flag to get a full list of possible arguments with detailed descriptions.
 
+### Run the benchmark
+A shell script [run_deep_speech.sh](run_deep_speech.sh) is provided to run the whole pipeline with default parameters. Issue the following command to run the benchmark:
+```
+sh run_deep_speech.sh
+```
+Note by default, the training dataset in the benchmark include train-clean-100, train-clean-360 and train-other-500, and the evaluation dataset include dev-clean and dev-other.
diff --git a/research/deep_speech/deep_speech.py b/research/deep_speech/deep_speech.py
@@ -212,6 +212,7 @@ def generate_dataset(data_dir):
 
 def run_deep_speech(_):
   """Run deep speech training and eval loop."""
+  tf.set_random_seed(flags_obj.seed)
   # Data preprocessing
   tf.logging.info("Data preprocessing...")
   train_speech_dataset = generate_dataset(flags_obj.train_data_dir)
@@ -319,19 +320,23 @@ def define_deep_speech_flags():
   flags_core.set_defaults(
       model_dir="/tmp/deep_speech_model/",
       export_dir="/tmp/deep_speech_saved_model/",
-      train_epochs=200,
+      train_epochs=10,
       batch_size=128,
       hooks="")
 
   # Deep speech flags
+  flags.DEFINE_integer(
+      name="seed", default=1,
+      help=flags_core.help_wrap("The random seed."))
+
   flags.DEFINE_string(
       name="train_data_dir",
-      default="/tmp/librispeech_data/train-clean/LibriSpeech/train-clean.csv",
+      default="/tmp/librispeech_data/test-clean/LibriSpeech/test-clean.csv",
       help=flags_core.help_wrap("The csv file path of train dataset."))
 
   flags.DEFINE_string(
       name="eval_data_dir",
-      default="/tmp/librispeech_data/dev-clean/LibriSpeech/dev-clean.csv",
+      default="/tmp/librispeech_data/test-clean/LibriSpeech/test-clean.csv",
       help=flags_core.help_wrap("The csv file path of evaluation dataset."))
 
   flags.DEFINE_bool(
diff --git a/research/deep_speech/run_deep_speech.sh b/research/deep_speech/run_deep_speech.sh
@@ -0,0 +1,50 @@
+#!/bin/bash
+# Script to run deep speech model to achieve the MLPerf target (WER = 0.23)
+# Step 1: download the LibriSpeech dataset.
+echo "Data downloading..."
+python data/download.py
+
+## After data downloading, the dataset directories are:
+train_clean_100="/tmp/librispeech_data/train-clean-100/LibriSpeech/train-clean-100.csv"
+train_clean_360="/tmp/librispeech_data/train-clean-360/LibriSpeech/train-clean-360.csv"
+train_other_500="/tmp/librispeech_data/train-other-500/LibriSpeech/train-other-500.csv"
+dev_clean="/tmp/librispeech_data/dev-clean/LibriSpeech/dev-clean.csv"
+dev_other="/tmp/librispeech_data/dev-other/LibriSpeech/dev-other.csv"
+test_clean="/tmp/librispeech_data/test-clean/LibriSpeech/test-clean.csv"
+test_other="/tmp/librispeech_data/test-other/LibriSpeech/test-other.csv"
+
+# Step 2: generate train dataset and evaluation dataset
+echo "Data preprocessing..."
+train_file="/tmp/librispeech_data/train_dataset.csv"
+eval_file="/tmp/librispeech_data/eval_dataset.csv"
+
+head -1 $train_clean_100 > $train_file
+for filename in $train_clean_100 $train_clean_360 $train_other_500
+do
+    sed 1d $filename >> $train_file
+done
+
+head -1 $dev_clean > $eval_file
+for filename in $dev_clean $dev_other
+do
+    sed 1d $filename >> $eval_file
+done
+
+# Step 3: filter out the audio files that exceed max time duration.
+final_train_file="/tmp/librispeech_data/final_train_dataset.csv"
+final_eval_file="/tmp/librispeech_data/final_eval_dataset.csv"
+
+MAX_AUDIO_LEN=27.0
+awk -v maxlen="$MAX_AUDIO_LEN" 'BEGIN{FS="\t";} NR==1{print $0} NR>1{cmd="soxi -D "$1""; cmd|getline x; if(x<=maxlen) {print $0}; close(cmd);}' $train_file > $final_train_file
+awk -v maxlen="$MAX_AUDIO_LEN" 'BEGIN{FS="\t";} NR==1{print $0} NR>1{cmd="soxi -D "$1""; cmd|getline x; if(x<=maxlen) {print $0}; close(cmd);}' $eval_file > $final_eval_file
+
+# Step 4: run the training and evaluation loop in background, and save the running info to a log file
+echo "Model training and evaluation..."
+start=`date +%s`
+
+log_file=log_`date +%Y-%m-%d`
+nohup python deep_speech.py --train_data_dir=$final_train_file --eval_data_dir=$final_eval_file --num_gpus=-1 --wer_threshold=0.23 --seed=1 >$log_file 2>&1&
+
+end=`date +%s`
+runtime=$((end-start))
+echo "Model training time is" $runtime "seconds."