bytedance
diff --git a/‎examples/inference/python/README.md
Lines changed: 62 additions & 23 deletions b/‎examples/inference/python/README.md
Lines changed: 62 additions & 23 deletions
diff --git a/‎examples/inference/python/export/ls_fs_transformer_export.py renamed to ‎examples/inference/python/export/fairseq/ls_fs_transformer_export.py
Lines changed: 26 additions & 11 deletions b/‎examples/inference/python/export/ls_fs_transformer_export.py renamed to ‎examples/inference/python/export/fairseq/ls_fs_transformer_export.py
Lines changed: 26 additions & 11 deletions
diff --git a/‎examples/inference/python/export/ls_fs_transformer_ptq_export.py renamed to ‎examples/inference/python/export/fairseq/ls_fs_transformer_ptq_export.py
Lines changed: 25 additions & 10 deletions b/‎examples/inference/python/export/ls_fs_transformer_ptq_export.py renamed to ‎examples/inference/python/export/fairseq/ls_fs_transformer_ptq_export.py
Lines changed: 25 additions & 10 deletions
@@ -1,71 +1,105 @@
-## Examples of exporting models for LightSeq inference
+# Examples of exporting models for LightSeq inference
 
-### Switch to the current directory
+## Switch to the current directory
 ```shell
 cd examples/inference/python
 ```
 
-### Export models
+## Export models
+### Hugging Face
 1. Hugging Face BART
 
 Export Hugging Face BART models to protobuf/hdf5 format.
 ```shell
-python export/hf_bart_export.py
+python export/huggingface/hf_bart_export.py
 ```
 2. Hugging Face BERT
 
 Export Hugging Face BERT models to hdf5 format.
 ```shell
-python export/hf_bert_export.py
+python export/huggingface/hf_bert_export.py
 ```
 3. Hugging Face GPT2
 
 Export Hugging Face GPT2 models to hdf5 format.
 ```shell
-python export/hf_gpt2_export.py
+python export/huggingface/hf_gpt2_export.py
 ```
-4. Fairseq Transformer using LightSeq training library
+### Native Fairseq
+1. Native Fairseq Transformer
+
+Export native Fairseq Transformer models to protobuf/hdf5 format. Refer to the `examples/training/fairseq` directory for more training details.
+```shell
+python export/fairseq/native_fs_transformer_export.py -m checkpoint_best.pt
+```
+
+2. Native Fairseq Transformer using PTQ
+
+Export native Fairseq Transformer models using PTQ to protobuf/hdf5 format. Refer to the `examples/training/fairseq` directory for more training details.
+```shell
+python export/fairseq/native_fs_transformer_export.py -m checkpoint_best.pt
+```
+
+3. Native Fairseq MoE Transformer
+
+Export Fairseq MoE models to protobuf/hdf5 format.
+```shell
+python export/fairseq/fs_moe_export.py
+```
+
+### Fairseq Transformer + LightSeq
+1. Fairseq Transformer using LightSeq training library
 
 Export Fairseq Transformer models training with LightSeq to protobuf/hdf5 format. Refer to the `examples/training/fairseq` directory for more training details.
 ```shell
-python export/ls_fs_transformer_export.py
+python export/fairseq/ls_fs_transformer_export.py -m checkpoint_best.pt
 ```
-5. Fairseq Transformer using LightSeq training library with int8 quantization
 
-Export Fairseq Transformer models training with LightSeq to protobuf format, and then using int8 quantization to speedup inference. Refer to the `examples/training/fairseq` directory for more training details.
+2. Fairseq Transformer using LightSeq training library with PTQ
+
+Export Fairseq Transformer models training with LightSeq to protobuf format, and then using PTQ to speedup inference. Refer to the `examples/training/fairseq` directory for more training details.
 ```shell
-python export/ls_fs_transformer_ptq_export.py
+python export/fairseq/ls_fs_transformer_ptq_export.py -m checkpoint_best.pt
 ```
-**You can compare the speeds between fp16 and int8 inference using above 4th and 5th examples.**
 
-6. LightSeq Transformer
+### LightSeq Transformer
+
+1. LightSeq Transformer
 
 Export LightSeq Transformer models to protobuf/hdf5 format. Refer to the `examples/training/custom` directory for more training details.
 ```shell
 python export/ls_transformer_export.py
 ```
-7. LightSeq Transformer using int8 quantization
+2. LightSeq Transformer using PTQ
 
-Export LightSeq fp16/fp32 Transformer models to int8 protobuf format, and then using int8 quantization to speedup inference. Refer to the `examples/training/custom` directory for more training details. Note that in this example, we do not need to finetune the models using fake-quantization.
+Export LightSeq fp16/fp32 Transformer models to int8 protobuf format, and then using PTQ to speedup inference. Refer to the `examples/training/custom` directory for more training details. Note that in this example, we do not need to finetune the models using fake-quantization.
 ```shell
 python export/ls_transformer_ptq_export.py
 ```
-**You can compare the speeds between fp16 and int8 inference using above 6th and 7th examples.**
 
-8. Fairseq Transformer
+### Fairseq Transformer + custom Torch layers
+1. Fairseq Transformer using custom Torch layers
 
-Export Fairseq Transformer models to protobuf/hdf5 format.
+Export Fairseq Transformer models training using custom Torch layers to protobuf/hdf5 format. Refer to the `examples/training/fairseq` directory for more training details.
 ```shell
-python export/fs_transformer_export.py
+python export/fairseq/ls_torch_fs_transformer_export.py -m checkpoint_best.pt
 ```
-9. Fairseq MoE
 
-Export Fairseq MoE models to protobuf/hdf5 format.
+2. Fairseq Transformer using custom Torch layers and PTQ
+
+Export PTQ Fairseq Transformer models training using custom Torch layers to protobuf/hdf5 format. Refer to the `examples/training/fairseq` directory for more training details.
+```shell
+python export/fairseq/ls_torch_fs_transformer_ptq_export.py -m checkpoint_best.pt
+```
+
+3. Quantized Fairseq Transformer using custom Torch layers
+
+Export quantized Fairseq Transformer models training using custom Torch layers to protobuf/hdf5 format. Refer to the `examples/training/fairseq` directory for more training details.
 ```shell
-python export/fs_moe_export.py
+python export/fairseq/ls_torch_fs_quant_transformer_export.py -m checkpoint_best.pt
 ```
 
-### Inference using LightSeq
+## Inference using LightSeq
 1. BART
 ```shell
 python test/ls_bart.py
@@ -78,3 +112,8 @@ python test/ls_bert.py
 ```shell
 python test/ls_gpt2.py
 ```
+
+4. Fairseq based models using LightSeq inference
+```shell
+bash test/ls_fairseq.sh --model ${model_path}
+```
@@ -2,9 +2,10 @@
 Export Fairseq Transformer models training with LightSeq to protobuf/hdf5 format.
 Refer to the `examples/training/fairseq` directory for more training details.
 """
+import argparse
 import torch
 import h5py
-from proto.transformer_pb2 import Transformer
+from export.proto.transformer_pb2 import Transformer
 from lightseq.training import (
     export_ls_config,
     export_ls_embedding,
@@ -60,8 +61,8 @@ def export_ls_fs_transformer(ckpt_path, out_path, save_pb=True):
     else:
         file = h5py.File(out_path, "w")
     encoder_state_dict, decoder_state_dict = _extract_weight(state_dict)
-    export_ls_embedding(file, encoder_state_dict, 1024, True, save_pb)
-    export_ls_embedding(file, decoder_state_dict, 1024, False, save_pb)
+    export_ls_embedding(file, encoder_state_dict, 300, True, save_pb)
+    export_ls_embedding(file, decoder_state_dict, 300, False, save_pb)
     export_ls_encoder(
         file,
         encoder_state_dict,
@@ -81,9 +82,9 @@ def export_ls_fs_transformer(ckpt_path, out_path, save_pb=True):
     export_ls_config(
         file,
         args.encoder_attention_heads,
+        1,
         2,
         2,
-        6,
         args.encoder_layers,
         args.decoder_layers,
         save_pb=save_pb,
@@ -96,19 +97,33 @@ def export_ls_fs_transformer(ckpt_path, out_path, save_pb=True):
         file.close()
 
 
+def parse_args():
+    parser = argparse.ArgumentParser(description="export fairseq checkpoint", usage="")
+    parser.add_argument(
+        "--model",
+        "-m",
+        type=str,
+        default="checkpoint_best.pt",
+        help="path of fairseq checkpoint",
+    )
+    args = parser.parse_args()
+    return args
+
+
 if __name__ == "__main__":
-    ckpt_path = "checkpoint_best.pt"
-    pb_path = "transformer.pb"
-    hdf5_path = "transformer.hdf5"
+    args = parse_args()
+    model_name = ".".join(args.model.split(".")[:-1])
+    pb_path = f"{model_name}.pb"
+    hdf5_path = f"{model_name}.hdf5"
     print("export to pb model >>>>>>")
-    export_ls_fs_transformer(ckpt_path, pb_path)
+    export_ls_fs_transformer(args.model, pb_path)
     print("export to hdf5 model >>>>>>")
-    export_ls_fs_transformer(ckpt_path, hdf5_path, save_pb=False)
-    src = [[63, 47, 65, 1507, 88, 74, 10, 2057, 362, 9, 284, 6, 2]]
+    export_ls_fs_transformer(args.model, hdf5_path, save_pb=False)
+    src = [[63, 47, 65, 1507, 88, 74, 10, 2057, 362, 9, 284, 6, 2, 1, 1, 1]]
     pb_model = lsi.Transformer(pb_path, 8)
     pb_output = pb_model.infer(src)
     hdf5_model = lsi.Transformer(hdf5_path, 8)
     hdf5_output = hdf5_model.infer(src)
-    # Expected result: [23, 550, 34, 118, 148, 2939, 4, 42, 32, 37, 6]
+    # Expected result: [23, 550, 34, 118, 148, 2939, 4, 42, 32, 37, 6, 224, 10, 179, 5, 2]
     print("pb results:", pb_output)
     print("hdf5 results:", hdf5_output)
@@ -3,9 +3,10 @@
 and then using int8 quantization to speedup inference.
 Refer to the `examples/training/fairseq` directory for more training details.
 """
+import argparse
 import torch
 import h5py
-from proto.quant_transformer_pb2 import QuantTransformer
+from export.proto.quant_transformer_pb2 import QuantTransformer
 from lightseq.training import (
     export_ls_config,
     export_ls_embedding_ptq,
@@ -47,7 +48,7 @@ def export_fs_weights(file, state_dict, save_pb=True):
     file.trg_embedding.shared_bias[:] = dec_shared_b
 
 
-def export_ls_fs_transformer(ckpt_path, out_path, save_pb=True):
+def export_ls_fs_transformer_ptq(ckpt_path, out_path, save_pb=True):
     with open(ckpt_path, "rb") as fin:
         ckpt_file = torch.load(fin)
     args = ckpt_file["args"]
@@ -58,14 +59,14 @@ def export_ls_fs_transformer(ckpt_path, out_path, save_pb=True):
     export_ls_embedding_ptq(
         file,
         encoder_state_dict,
-        1024,
+        300,
         True,
         save_pb=save_pb,
     )
     export_ls_embedding_ptq(
         file,
         decoder_state_dict,
-        1024,
+        300,
         False,
         save_pb=save_pb,
     )
@@ -90,9 +91,9 @@ def export_ls_fs_transformer(ckpt_path, out_path, save_pb=True):
     export_ls_config(
         file,
         args.encoder_attention_heads,
+        1,
         2,
         2,
-        6,
         args.encoder_layers,
         args.decoder_layers,
         save_pb=save_pb,
@@ -102,13 +103,27 @@ def export_ls_fs_transformer(ckpt_path, out_path, save_pb=True):
         fout.write(file.SerializeToString())
 
 
+def parse_args():
+    parser = argparse.ArgumentParser(description="export fairseq checkpoint", usage="")
+    parser.add_argument(
+        "--model",
+        "-m",
+        type=str,
+        default="checkpoint_best.pt",
+        help="path of fairseq checkpoint",
+    )
+    args = parser.parse_args()
+    return args
+
+
 if __name__ == "__main__":
-    ckpt_path = "checkpoint_best.pt"
-    pb_path = "quant_transformer.pb"
+    args = parse_args()
+    model_name = ".".join(args.model.split(".")[:-1])
+    pb_path = f"{model_name}_ptq.pb"
     print("export to pb model >>>>>>")
-    export_ls_fs_transformer(ckpt_path, pb_path)
-    src = [[63, 47, 65, 1507, 88, 74, 10, 2057, 362, 9, 284, 6, 2]]
+    export_ls_fs_transformer_ptq(args.model, pb_path)
+    src = [[63, 47, 65, 1507, 88, 74, 10, 2057, 362, 9, 284, 6, 2, 1, 1, 1]]
     pb_model = lsi.QuantTransformer(pb_path, 8)
     pb_output = pb_model.infer(src)
-    # FP16 result: [23, 550, 34, 118, 148, 2939, 4, 42, 32, 37, 6]
+    # FP16 result: [23, 550, 34, 118, 148, 2939, 4, 42, 32, 37, 6, 224, 10, 179, 5, 2]
     print("pb results:", pb_output)