change G2PWModel download

BarryKCL · BarryKCL · commit de0f99150a65 · 2022-08-08T21:29:10.000+08:00
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -1,3 +1,2 @@
 include paddlespeech/t2s/exps/*.txt
-include paddlespeech/t2s/frontend/*.yaml
-include paddlespeech/t2s/frontend/g2pw/*.json
+include paddlespeech/t2s/frontend/*.yaml
diff --git a/paddlespeech/resource/pretrained_models.py b/paddlespeech/resource/pretrained_models.py
@@ -655,24 +655,6 @@
             'phone_id_map.txt',
         },
     },
-    "fastspeech2_mix-mix": {
-        '1.0': {
-            'url':
-            'https://paddlespeech.bj.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_csmscljspeech_add-zhen.zip',
-            'md5':
-            '77d9d4b5a79ed6203339ead7ef6c74f9',
-            'config':
-            'default.yaml',
-            'ckpt':
-            'snapshot_iter_94000.pdz',
-            'speech_stats':
-            'speech_stats.npy',
-            'phones_dict':
-            'phone_id_map.txt',
-            'speaker_dict':
-            'speaker_id_map.txt',
-        },
-    },
     # tacotron2
     "tacotron2_csmsc-zh": {
         '1.0': {
@@ -1095,8 +1077,7 @@
             'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/speedyspeech/speedyspeech_csmsc_onnx_0.2.0.zip',
             'md5':
             '3e9c45af9ef70675fc1968ed5074fc88',
-            'ckpt':
-            'speedyspeech_csmsc.onnx',
+            'ckpt': ['speedyspeech_csmsc.onnx'],
             'phones_dict':
             'phone_id_map.txt',
             'tones_dict':
@@ -1112,8 +1093,7 @@
             'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_csmsc_onnx_0.2.0.zip',
             'md5':
             'fd3ad38d83273ad51f0ea4f4abf3ab4e',
-            'ckpt':
-            'fastspeech2_csmsc.onnx',
+            'ckpt': ['fastspeech2_csmsc.onnx'],
             'phones_dict':
             'phone_id_map.txt',
             'sample_rate':
@@ -1126,8 +1106,7 @@
             'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_ljspeech_onnx_1.1.0.zip',
             'md5':
             '00754307636a48c972a5f3e65cda3d18',
-            'ckpt':
-            'fastspeech2_ljspeech.onnx',
+            'ckpt': ['fastspeech2_ljspeech.onnx'],
             'phones_dict':
             'phone_id_map.txt',
             'sample_rate':
@@ -1140,8 +1119,7 @@
             'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_aishell3_onnx_1.1.0.zip',
             'md5':
             'a1d6ee21de897ce394f5469e2bb4df0d',
-            'ckpt':
-            'fastspeech2_aishell3.onnx',
+            'ckpt': ['fastspeech2_aishell3.onnx'],
             'phones_dict':
             'phone_id_map.txt',
             'speaker_dict':
@@ -1153,11 +1131,10 @@
     "fastspeech2_vctk_onnx-en": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_vctk_onnx_1.1.0.zip',
+            'hhttps://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_vctk_onnx_1.1.0.zip',
             'md5':
             'd9c3a9b02204a2070504dd99f5f959bf',
-            'ckpt':
-            'fastspeech2_vctk.onnx',
+            'ckpt': ['fastspeech2_vctk.onnx'],
             'phones_dict':
             'phone_id_map.txt',
             'speaker_dict':
@@ -1335,3 +1312,17 @@
         },
     },
 }
+
+# ---------------------------------
+# ------------- G2PW ---------------
+# ---------------------------------
+g2pw_onnx_models = {
+    'G2PWModel': {
+        '1.0': {
+            'url':
+            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/g2p/G2PWModel.tar',
+            'md5':
+            '86a3dd8db0291c575c46e134111dce23',
+        },
+    },
+}
diff --git a/paddlespeech/t2s/frontend/g2pw/onnx_api.py b/paddlespeech/t2s/frontend/g2pw/onnx_api.py
@@ -10,14 +10,14 @@
 from opencc import OpenCC
 
 from paddlenlp.transformers import BertTokenizer
-
+from paddlespeech.utils.env import MODEL_HOME
 from paddlespeech.t2s.frontend.g2pw.dataset import prepare_data,\
                                                    prepare_onnx_input,\
                                                    get_phoneme_labels,\
                                                    get_char_phoneme_labels
 from paddlespeech.t2s.frontend.g2pw.utils import load_config
-
-MODEL_URL = 'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/g2p/G2PWModel.tar'
+from paddlespeech.cli.utils import download_and_decompress
+from paddlespeech.resource.pretrained_models import g2pw_onnx_models
 
 
 def predict(session, onnx_input, labels):
@@ -40,21 +40,10 @@ def predict(session, onnx_input, labels):
     return all_preds, all_confidences
 
 
-def download_model(model_dir):
-    os.makedirs(model_dir, exist_ok=True)
-    wget_shell = "cd %s  && wget %s"%(model_dir,MODEL_URL)
-    os.system(wget_shell)
-    shell = "cd %s ;tar -xvf %s;cd %s/G2PWModel;rm -rf .*" % (model_dir,MODEL_URL.split("/")[-1], model_dir)
-    os.system(shell)
-    rm_shell = "cd %s && rm -rf %s"%(model_dir,MODEL_URL.split("/")[-1])
-    os.system(rm_shell)
-
-
 class G2PWOnnxConverter:
-    def __init__(self, style='bopomofo', model_source=None, enable_non_tradional_chinese=False):
-        model_dir = os.path.join(os.path.expandvars('$HOME'), 'paddlespeech/models')
+    def __init__(self, model_dir = MODEL_HOME, style='bopomofo', model_source=None, enable_non_tradional_chinese=False):
         if not os.path.exists(os.path.join(model_dir, 'G2PWModel/g2pW.onnx')):
-            download_model(model_dir)
+            uncompress_path = download_and_decompress(g2pw_onnx_models['G2PWModel']['1.0'],model_dir)
 
         sess_options = onnxruntime.SessionOptions()
         sess_options.intra_op_num_threads = 2