maybe bugfix

Ubuntu · Ubuntu · commit ea41d81a6bab · 2019-12-15T15:22:06.000Z
diff --git a/configs/attn_lstm_vocab_1k.yml b/configs/attn_lstm_vocab_1k.yml
@@ -1,6 +1,6 @@
 name: attn_lstm_vocab_1k
 train:
-    batch_size: 512
+    batch_size: 256
     LOAD_EPOCH:
     epochs: 3
     num_workers: 6
diff --git a/configs/attn_lstm_vocab_50k.yml b/configs/attn_lstm_vocab_50k.yml
@@ -1,8 +1,8 @@
 name: attn_lstm_vocab_50k
 train:
-    batch_size: 128
+    batch_size: 64
     LOAD_EPOCH:
-    epochs: 6
+    epochs: 1
     num_workers: 6
     eval_period: 1
     checkpoint_period: 1
diff --git a/configs/pointer_vocab_50k.yml b/configs/pointer_vocab_50k.yml
@@ -1,8 +1,8 @@
 name: pointer_vocab_50k
 train:
-    batch_size: 128
+    batch_size: 64
     LOAD_EPOCH:
-    epochs: 6
+    epochs: 1
     num_workers: 6
     eval_period: 1
     checkpoint_period: 1
diff --git a/configs/simple_lstm_vocab_50k.yml b/configs/simple_lstm_vocab_50k.yml
@@ -1,8 +1,8 @@
 name: simple_lstm_vocab_50k
 train:
-    batch_size: 256
+    batch_size: 128
     LOAD_EPOCH:
-    epochs: 5
+    epochs: 1
     num_workers: 6
     eval_period: 1
     checkpoint_period: 1
diff --git a/model.py b/model.py
@@ -336,7 +336,7 @@ def forward(
 #                 cond = (t_tensor[:, iter] < self.vocab_sizeT + self.attn_size).long()
 #                 masked_target = cond * t_tensor[:, iter] + (1 - cond) * self.eof_T_id
             target = t_tensor[:, iter]
-            target[target >= output.shape[1]] = self.eof_T_id # ignored index
+            target[target >= output.shape[1]] = self.unk_id
             token_losses[:, iter] = self.criterion(output, t_tensor[:, iter].clone().detach())
 
         loss = token_losses.sum() #/ batch_size
diff --git a/run.sh b/run.sh
@@ -1,6 +1,7 @@
-python3 train.py --config=configs/pointer_vocab_10k.yml
-python3 train.py --config=configs/pointer_vocab_50k.yml
-python3 train.py --config=configs/attn_lstm_vocab_1k.yml
-python3 train.py --config=configs/attn_lstm_vocab_50k.yml
-python3 train.py --config=configs/simple_lstm_vocab_1k.yml
-python3 train.py --config=configs/simple_lstm_vocab_50k.yml
+rm -r logs/pointer_vocab_50k
+# python3 train.py --config=configs/pointer_vocab_10k.yml
+python train.py --config=configs/pointer_vocab_50k.yml
+python train.py --config=configs/attn_lstm_vocab_1k.yml
+python train.py --config=configs/attn_lstm_vocab_50k.yml
+#python3 train.py --config=configs/simple_lstm_vocab_1k.yml
+python train.py --config=configs/simple_lstm_vocab_50k.yml
diff --git a/train.py b/train.py
@@ -47,6 +47,7 @@ def train(config):
     )
     
     ignored_index = data_train.vocab_sizeT - 1
+    unk_index = data_train.vocab_sizeT - 2
 
     model = MixtureAttention(
         hidden_size = config.model.hidden_size,
diff --git a/utils.py b/utils.py
@@ -32,9 +32,10 @@ def forward(self, output, target):
 
         return F.kl_div(output, model_prob, reduction='sum')
     
-def accuracy(out, target, ignored_index):
+def accuracy(out, target, ignored_index, unk_index):
     out_ = out[target != ignored_index]
     target_ = target[target != ignored_index]
+    out_ = out_[out_ == unk_index] = -1
     return accuracy_score(out_, target_)
 
 class DotDict(dict):

Original file line number	Diff line number	Diff line change
`@@ -47,6 +47,7 @@ def train(config):`
`47`	`47`	`)`
`48`	`48`
`49`	`49`	`ignored_index = data_train.vocab_sizeT - 1`
	`50`	`+ unk_index = data_train.vocab_sizeT - 2`
`50`	`51`
`51`	`52`	`model = MixtureAttention(`
`52`	`53`	`hidden_size = config.model.hidden_size,`