OpenNMT · anderleich · Oct 24, 2022 · Oct 24, 2022 · Oct 26, 2022 · Nov 3, 2022
diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml
@@ -78,8 +78,11 @@ jobs:
           -word_vec_size 5 \
           -report_every 5\
           -hidden_size 10 \
-          -train_steps 10
-    - name: Test RNN training with copy
+          -train_steps 10 \
+          -tensorboard "true" \
+          -tensorboard_log_dir /tmp/logs_train
+        python onmt/tests/test_events.py --logdir /tmp/logs_train -tensorboard_checks train
+    - name: Test RNN training and validation with copy
       run: |
         python train.py \
           -config data/data.yaml \
@@ -93,8 +96,11 @@ jobs:
           -word_vec_size 5 \
           -report_every 5 \
           -hidden_size 10 \
-          -train_steps 10 \
+          -train_steps 10 -valid_steps 5 \
+          -tensorboard "true" \
+          -tensorboard_log_dir /tmp/logs_train_valid \
           -copy_attn
+        python onmt/tests/test_events.py --logdir /tmp/logs_train_valid -tensorboard_checks train_valid
     - name: Test RNN training with coverage
       run: |
         python train.py \
@@ -116,7 +122,6 @@ jobs:
           -tgt_vocab /tmp/onmt.vocab.tgt \
           -src_vocab_size 1000 \
           -tgt_vocab_size 1000 \
-          -max_generator_batches 0 \
           -encoder_type transformer \
           -decoder_type transformer \
           -layers 4 \
@@ -133,6 +138,66 @@ jobs:
           -attention_dropout 0.2 0.1 0.1 \
           -report_every 5 \
           -train_steps 10
+    - name : Test Transformer training with dynamic scoring
+      run: |
+        python3 train.py \
+          -config data/data.yaml \
+          -src_vocab /tmp/onmt.vocab.src \
+          -tgt_vocab /tmp/onmt.vocab.tgt \
+          -src_vocab_size 1000 \
+          -tgt_vocab_size 1000 \
+          -encoder_type transformer \
+          -decoder_type transformer \
+          -layers 4 \
+          -word_vec_size 16 \
+          -hidden_size 16 \
+          -num_workers 0 -bucket_size 1024 \
+          -heads 2 \
+          -transformer_ff 64 \
+          -num_workers 0 -bucket_size 1024 \
+          -accum_count 2 4 8 \
+          -accum_steps 0 15000 30000 \
+          -save_model /tmp/onmt.model \
+          -train_steps 20 \
+          -report_every 5 \
+          -train_eval_steps 10 \
+          -train_metrics "BLEU" "TER" \
+          -tensorboard "true" \
+          -scoring_debug "true" \
+          -tensorboard_log_dir /tmp/logs_train_metrics \
+          -dump_preds /tmp/dump_preds
+        python onmt/tests/test_events.py --logdir /tmp/logs_train_metrics -tensorboard_checks train_metrics
+    - name : Test Transformer training and validation with dynamic scoring and copy
+      run: |
+        python3 train.py \
+          -config data/data.yaml \
+          -src_vocab /tmp/onmt.vocab.src \
+          -tgt_vocab /tmp/onmt.vocab.tgt \
+          -src_vocab_size 1000 \
+          -tgt_vocab_size 1000 \
+          -encoder_type transformer \
+          -decoder_type transformer \
+          -layers 4 \
+          -word_vec_size 16 \
+          -hidden_size 16 \
+          -num_workers 0 -bucket_size 1024 \
+          -heads 2 \
+          -transformer_ff 64 \
+          -num_workers 0 -bucket_size 1024 \
+          -accum_count 2 4 8 \
+          -accum_steps 0 15000 30000 \
+          -save_model /tmp/onmt.model \
+          -train_steps 10  -valid_steps 5 \
+          -report_every 2 \
+          -train_eval_steps 8 \
+          -train_metrics "BLEU" "TER" \
+          -valid_metrics "BLEU" "TER" \
+          -tensorboard "true" \
+          -scoring_debug "true" \
+          -tensorboard_log_dir /tmp/logs_train_valid_metrics \
+          -dump_preds /tmp/dump_preds \
+          -copy_attn
+        python onmt/tests/test_events.py --logdir /tmp/logs_train_valid_metrics -tensorboard_checks train_valid_metrics 
     - name: Test LM training
       run: |
         python train.py \

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -3,6 +3,40 @@
 
 
 ## [Unreleased]
+## [3.0.3](https://github.com/OpenNMT/OpenNMT-py/tree/3.0.3) (2022-12-16)
+* fix loss normalization when using accum or nb GPU > 1
+* use native CrossEntropyLoss with Label Smoothing. reported loss/ppl impacted by LS
+* fix long-time coverage loss bug thanks Sanghyuk-Choi
+* fix detok at scoring / fix tokenization Subword_nmt + Sentencepiece
+* various small bugs fixed
+
+## [3.0.2](https://github.com/OpenNMT/OpenNMT-py/tree/3.0.2) (2022-12-07)
+* pyonmttok.Vocab is now pickable. dataloader switched to spawn. (MacOS/Windows compatible)
+* fix scoring with specific metrics (BLEU, TER)
+* fix tensorboard logging
+* fix dedup in batch iterator (only for TRAIN, was happening at inference also)
+* New: Change: tgt_prefix renamed to tgt_file_prefix
+* New: tgt_prefix / src_prefix used for "prefix" Transform (onmt/transforms/misc.py)
+* New: process transforms of buckets in batches (vs per example) / faster
+
+## [3.0.1](https://github.com/OpenNMT/OpenNMT-py/tree/3.0.1) (2022-11-23)
+
+* fix dynamic scoring
+* reinstate apex.amp level O1/O2 for benchmarking
+* New: LM distillation for NMT training
+* New: bucket_size ramp-up to avoid slow start
+* fix special tokens order
+* remove Library and add link to Yasmin's Tuto
+
+## [3.0.0](https://github.com/OpenNMT/OpenNMT-py/tree/3.0.0) (2022-11-3)
+
+* Removed completely torchtext. Use [Vocab object of pyonmttok](https://github.com/OpenNMT/Tokenizer/tree/master/bindings/python#vocabulary) instead
+* Dataloading changed accordingly with the use of pytorch Dataloader (num_workers)
+* queue_size / pool_factor no longer needed. bucket_size optimal value > 64K
+* options renamed: rnn_size => hidden_size (enc/dec_rnn_size => enc/dec_hid_size)
+* new tools/convertv2_v3.py to upgrade v2 models.pt
+* inference with length_penalty=avg is now the default
+* add_qkvbias (default false, but true for old model)
 
 ## [2.3.0](https://github.com/OpenNMT/OpenNMT-py/tree/2.3.0) (2022-09-14)
 

diff --git a/README.md b/README.md
@@ -16,6 +16,11 @@ Unless there is a bug, please use the [forum](https://forum.opennmt.net) or [Git
 
 ----
 
+There is a new step-by-step and explained tuto (Thanks to Yasmin Moslem) here:
+Please try to read and/or follow before raising newbie issues [Tutorial](https://github.com/ymoslem/OpenNMT-Tutorial)
+
+----
+
 # OpenNMT-py 3.0
 
 **We're happy to announce the release v3.0 of OpenNMT-py.**
@@ -52,14 +57,34 @@ If you want to optimize the training performance:
 
 ### Breaking changes
 
-A few features were dropped between v1 and v2:
+Changes between v2 and v3:
+
+Options removed:
+`queue_size`, `pool_factor` are no longer needed. Only adjust the `bucket_size` to the number of examples to be loaded by each `num_workers` of the pytorch Dataloader.
+
+New options: 
+`num_workers`: number of workers for each process. If you run on one GPU the recommended value is 4. If you run on more than 1 GPU, the recommended value is 2
+`add_qkvbias`: default is false. However old model trained with v2 will be set at true. The original transformer paper used no bias for the Q/K/V nn.Linear of the multihead attention module.
+
+Options renamed:
+`rnn_size` => `hidden_size`
+`enc_rnn_size` => `enc_hid_size`
+`dec_rnn_size` => `dec_hid_size`
+
+Note: `tools/convertv2_v3.py` will modify these options stored in the checkpoint to make things compatible with v3.0
+
+Inference:
+The translator will use the same dynamic_iterator as the trainer.
+The new default for inference is `length_penalty=avg` which will provide better BLEU scores in most cases (and comparable to other toolkits defaults)
+
+
+
+Reminder: a few features were dropped between v1 and v2:
 
 - audio, image and video inputs;
 
 For any user that still need these features, the previous codebase will be retained as `legacy` in a separate branch. It will no longer receive extensive development from the core team but PRs may still be accepted.
 
-- For inference, we default to length_penalty: avg which usually gives better BLEU and is comparable to other toolkits.
-
 Feel free to check it out and let us know what you think of the new paradigm!
 
 ----
@@ -79,7 +104,7 @@ Table of Contents
 
 OpenNMT-py requires:
 
-- Python >= 3.6
+- Python >= 3.7
 - PyTorch >= 1.9.0
 
 Install `OpenNMT-py` from `pip`:
@@ -104,8 +129,7 @@ pip install -r requirements.opt.txt
 
 ## Features
 
-- :warning: **New in OpenNMT-py 2.0**: [On the fly data processing]([here](https://opennmt.net/OpenNMT-py/FAQ.html#what-are-the-readily-available-on-the-fly-data-transforms).)
-
+- [On the fly data processing]([here](https://opennmt.net/OpenNMT-py/FAQ.html#what-are-the-readily-available-on-the-fly-data-transforms).)
 - [Encoder-decoder models with multiple RNN cells (LSTM, GRU) and attention types (Luong, Bahdanau)](https://opennmt.net/OpenNMT-py/options/train.html#model-encoder-decoder)
 - [Transformer models](https://opennmt.net/OpenNMT-py/FAQ.html#how-do-i-use-the-transformer-model)
 - [Copy and Coverage Attention](https://opennmt.net/OpenNMT-py/options/train.html#model-attention)

diff --git a/config/config-transformer-base-1GPU.yml b/config/config-transformer-base-1GPU.yml
@@ -30,8 +30,6 @@ normalization: tokens
 dropout: 0.1
 label_smoothing: 0.1
 
-max_generator_batches: 2
-
 param_init: 0.0
 param_init_glorot: 'true'
 position_encoding: 'true'

diff --git a/config/config-transformer-base-4GPU.yml b/config/config-transformer-base-4GPU.yml
@@ -30,8 +30,6 @@ normalization: tokens
 dropout: 0.1
 label_smoothing: 0.1
 
-max_generator_batches: 2
-
 param_init: 0.0
 param_init_glorot: 'true'
 position_encoding: 'true'

diff --git a/data/README.md b/data/README.md
@@ -4,4 +4,4 @@
 
 > python preprocess.py -train_src data/src-train.txt -train_tgt data/tgt-train.txt -valid_src data/src-val.txt -valid_tgt data/tgt-val.txt -save_data data/data -src_vocab_size 1000 -tgt_vocab_size 1000
 
-> python train.py -data data/data -save_model /n/rush_lab/data/tmp_ -world_size 1 -gpu_ranks 0 -rnn_size 100 -word_vec_size 50 -layers 1 -train_steps 100 -optim adam  -learning_rate 0.001
+> python train.py -data data/data -save_model /n/rush_lab/data/tmp_ -world_size 1 -gpu_ranks 0 -hidden_size 100 -word_vec_size 50 -layers 1 -train_steps 100 -optim adam  -learning_rate 0.001
Original file line number	Diff line number	Diff line change
Expand Up		@@ -4,4 +4,4 @@

		> python preprocess.py -train_src data/src-train.txt -train_tgt data/tgt-train.txt -valid_src data/src-val.txt -valid_tgt data/tgt-val.txt -save_data data/data -src_vocab_size 1000 -tgt_vocab_size 1000

		> python train.py -data data/data -save_model /n/rush_lab/data/tmp_ -world_size 1 -gpu_ranks 0 -rnn_size 100 -word_vec_size 50 -layers 1 -train_steps 100 -optim adam -learning_rate 0.001
		> python train.py -data data/data -save_model /n/rush_lab/data/tmp_ -world_size 1 -gpu_ranks 0 -hidden_size 100 -word_vec_size 50 -layers 1 -train_steps 100 -optim adam -learning_rate 0.001