#52: debug trainer finished

cjopengler · cjopengler · commit 5e9897b37bd2 · 2021-11-11T09:11:13.000+08:00
diff --git a/data/mrc_ner/config/config.json b/data/mrc_ner/config/config.json
@@ -16,6 +16,7 @@
     "__name_space__": "mrc_ner",
     "tokenizer": {
       "__type__": "BertTokenizer",
+      "__name_space__": "__easytext__",
       "bert_dir": "/Users/panxu/MyProjects/github/easytext/data/pretrained/bert/chinese_roberta_wwm_large_ext_pytorch"
     },
 
@@ -25,7 +26,7 @@
 
 
   "model": {
-    "__type__": "BertRnnWithCrf",
+    "__type__": "MRCNer",
     "__name_space__": "mrc_ner",
     "bert_dir": "/Users/panxu/MyProjects/github/easytext/data/pretrained/bert/chinese_roberta_wwm_large_ext_pytorch",
     "dropout": 0.1
@@ -52,13 +53,15 @@
     "weight_decay": 0.01
   },
 
-  "lr_scheduler": {
+  "#lr_scheduler": {
     "__type__": "MRCLrScheduler",
     "__name_space__": "mrc_ner",
     "max_lr": 0.00002,
     "final_div_factor": 10000,
-    "total_steps": null,
+    "total_steps": null
   },
+
+  "lr_scheduler": null,
   "grad_rescaled": null,
 
   "process_group_parameter": {
diff --git a/mrc/data/bert_model_collate.py b/mrc/data/bert_model_collate.py
@@ -89,7 +89,7 @@ def __call__(self, instances: List[Instance]) -> MRCModelInputs:
                 metadata["positions"] = zip(start_positions, end_positions)
 
                 start_positions = [(query_offset + start_position) for start_position in start_positions]
-                start_position_labels = torch.zeros(batch_max_length)
+                start_position_labels = torch.zeros(batch_max_length, dtype=torch.long)
 
                 for start_position in start_positions:
                     if start_position < batch_max_length - 1:
@@ -98,17 +98,17 @@ def __call__(self, instances: List[Instance]) -> MRCModelInputs:
                 batch_start_position_labels.append(start_position_labels)
 
                 end_positions = [(query_offset + end_position) for end_position in end_positions]
-                end_position_labels = torch.zeros(batch_max_length)
+                end_position_labels = torch.zeros(batch_max_length, dtype=torch.long)
 
                 for end_position in end_positions:
 
                     if end_position < batch_max_length - 1:
                         end_position_labels[end_position] = 1
 
-                batch_end_position_labels.append(torch.tensor(end_position_labels, dtype=torch.long))
+                batch_end_position_labels.append(end_position_labels)
 
                 # match position
-                match_positions = torch.zeros(size=(batch_max_length, batch_max_length))
+                match_positions = torch.zeros(size=(batch_max_length, batch_max_length), dtype=torch.long)
 
                 for start_position, end_position in zip(start_positions, end_positions):
 
diff --git a/mrc/label_decoder/mrc_label_index_decoder.py b/mrc/label_decoder/mrc_label_index_decoder.py
@@ -28,6 +28,7 @@ def __call__(self,
                  match_logits: torch.Tensor,
                  mask: torch.BoolTensor) -> torch.LongTensor:
 
+        mask = mask.bool()
         batch_size, seq_len = start_logits.size()
 
         # match label pred, [batch_size, seq_len, seq_len]
diff --git a/mrc/loss/mrc_bce_loss.py b/mrc/loss/mrc_bce_loss.py
@@ -40,15 +40,15 @@ def __call__(self, model_outputs: MRCNerOutput, golden_label: Dict[str, torch.Te
 
         batch_size, sequence_length = model_outputs.start_logits.size()
 
-        start_loss = self.loss(model_outputs.start_logits, golden_label["start_position_labels"])
+        start_loss = self.loss(model_outputs.start_logits, golden_label["start_position_labels"].float())
         # 计算得到 mean
         start_loss = (start_loss * mask).sum() / mask.sum()
 
-        end_loss = self.loss(model_outputs.end_logits, golden_label["end_position_labels"])
+        end_loss = self.loss(model_outputs.end_logits, golden_label["end_position_labels"].float())
         end_loss = (end_loss * mask).sum() / mask.sum()
 
         match_loss = self.loss(model_outputs.match_logits.view(batch_size, -1),
-                               golden_label["match_position_labels"].view(batch_size, -1))
+                               golden_label["match_position_labels"].float().view(batch_size, -1))
 
         match_label_row_mask = mask.bool().unsqueeze(-1).expand(-1, -1, sequence_length)
         match_label_col_mask = mask.bool().unsqueeze(-2).expand(-1, sequence_length, -1)
diff --git a/mrc/metric/mrc_f1_metric.py b/mrc/metric/mrc_f1_metric.py
@@ -43,7 +43,7 @@ def __call__(self, prediction_match_labels: torch.Tensor, gold_match_labels: tor
         :param mask: mask
         :return: metric dict
         """
-
+        mask = mask.bool()
         batch_size, seq_length = mask.size()
 
         match_label_mask = (mask.unsqueeze(-1).expand(-1, -1, seq_length)
diff --git a/mrc/metric/mrc_metric.py b/mrc/metric/mrc_metric.py
@@ -36,18 +36,18 @@ def __init__(self):
         self.model_label_decoder = MRCModelLabelDecoder()
         self.mrc_f1_metric = MRCF1Metric(labels=list())
 
-    def __call__(self, model_outputs: MRCNerOutput, golden_label_dict: Dict[str, Tensor]) -> Tuple[Dict, ModelTargetMetric]:
+    def __call__(self, model_outputs: MRCNerOutput, golden_labels: Dict[str, Tensor]) -> Tuple[Dict, ModelTargetMetric]:
         """
         计算 metric
         :param model_outputs:
-        :param golden_label_dict: start_position_labels, end_position_labels, batch_match_positions
+        :param golden_labels: start_position_labels, end_position_labels, batch_match_positions
         :return:
         """
         model_outputs: MRCNerOutput = model_outputs
 
         match_prediction_labels = self.model_label_decoder.decode_label_index(model_outputs=model_outputs)
 
-        match_golden_labels = golden_label_dict["match_position_labels"]
+        match_golden_labels = golden_labels["match_position_labels"]
 
         # 计算 overall f1
         mask = model_outputs.mask.detach()
diff --git a/mrc/optimizer/mrc_optimizer.py b/mrc/optimizer/mrc_optimizer.py
@@ -29,11 +29,11 @@ def create(self, model: Model) -> "Optimizer":
         no_decay = ["bias", "LayerNorm.weight"]
         parameters = [
             {
-                "params": [p for n, p in self.model.named_parameters() if not any(nd in n for nd in no_decay)],
+                "params": [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)],
                 "weight_decay": self.weight_decay,
             },
             {
-                "params": [p for n, p in self.model.named_parameters() if any(nd in n for nd in no_decay)],
+                "params": [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)],
                 "weight_decay": 0.0,
             }
         ]

Original file line number	Diff line number	Diff line change
`@@ -29,11 +29,11 @@ def create(self, model: Model) -> "Optimizer":`
`29`	`29`	`no_decay = ["bias", "LayerNorm.weight"]`
`30`	`30`	`parameters = [`
`31`	`31`	`{`
`32`		`- "params": [p for n, p in self.model.named_parameters() if not any(nd in n for nd in no_decay)],`
	`32`	`+ "params": [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)],`
`33`	`33`	`"weight_decay": self.weight_decay,`
`34`	`34`	`},`
`35`	`35`	`{`
`36`		`- "params": [p for n, p in self.model.named_parameters() if any(nd in n for nd in no_decay)],`
	`36`	`+ "params": [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)],`
`37`	`37`	`"weight_decay": 0.0,`
`38`	`38`	`}`
`39`	`39`	`]`