|
| 1 | +import torch |
| 2 | +from torch import nn |
| 3 | +import json |
| 4 | +import os |
| 5 | +from ..base_model import BaseModel |
| 6 | +from ..utils import PropertyPredictionOutput, KnowledgePredictionOutput |
| 7 | +from transformers import PretrainedConfig |
| 8 | +from typing import List |
| 9 | +from ..rnn.harnn import HAM |
| 10 | +from transformers import BartConfig as JiuzhangConfig |
| 11 | +from .modeling import CPTModel as JiuzhangModel |
| 12 | + |
| 13 | + |
| 14 | +__all__ = ["JiuzhangForPropertyPrediction", "JiuzhangForKnowledgePrediction"] |
| 15 | + |
| 16 | + |
| 17 | +class JiuzhangForPropertyPrediction(BaseModel): |
| 18 | + def __init__(self, pretrained_model_dir=None, head_dropout=0.5, init=True): |
| 19 | + super(JiuzhangForPropertyPrediction, self).__init__() |
| 20 | + jiuzhang_config = JiuzhangConfig.from_pretrained(pretrained_model_dir) |
| 21 | + if init: |
| 22 | + print(f'Load Jiuzhang from checkpoint: {pretrained_model_dir}') |
| 23 | + self.jiuzhang = JiuzhangModel.from_pretrained(pretrained_model_dir, ignore_mismatched_sizes=True) |
| 24 | + else: |
| 25 | + print(f'Load Jiuzhang from config: {pretrained_model_dir}') |
| 26 | + self.jiuzhang = JiuzhangModel(jiuzhang_config) |
| 27 | + self.hidden_size = self.jiuzhang.config.hidden_size |
| 28 | + self.head_dropout = head_dropout |
| 29 | + self.dropout = nn.Dropout(head_dropout) |
| 30 | + self.classifier = nn.Linear(self.hidden_size, 1) |
| 31 | + self.sigmoid = nn.Sigmoid() |
| 32 | + self.criterion = nn.MSELoss() |
| 33 | + |
| 34 | + self.config = {k: v for k, v in locals().items() if k not in ["self", "__class__", "jiuzhang_config"]} |
| 35 | + self.config['architecture'] = 'JiuzhangForPropertyPrediction' |
| 36 | + self.config = PretrainedConfig.from_dict(self.config) |
| 37 | + |
| 38 | + def forward(self, |
| 39 | + input_ids=None, |
| 40 | + attention_mask=None, |
| 41 | + token_type_ids=None, |
| 42 | + labels=None): |
| 43 | + outputs = self.jiuzhang(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids) |
| 44 | + # outputs = self.jiuzhang(input_ids=input_ids, attention_mask=attention_mask) |
| 45 | + item_embeds = outputs.last_hidden_state[:, 0, :] |
| 46 | + item_embeds = self.dropout(item_embeds) |
| 47 | + |
| 48 | + logits = self.sigmoid(self.classifier(item_embeds)).squeeze(1) |
| 49 | + loss = None |
| 50 | + if labels is not None: |
| 51 | + loss = self.criterion(logits, labels) if labels is not None else None |
| 52 | + return PropertyPredictionOutput( |
| 53 | + loss=loss, |
| 54 | + logits=logits, |
| 55 | + ) |
| 56 | + |
| 57 | + @classmethod |
| 58 | + def from_config(cls, config_path, **kwargs): |
| 59 | + config_path = os.path.join(os.path.dirname(config_path), 'model_config.json') |
| 60 | + with open(config_path, "r", encoding="utf-8") as rf: |
| 61 | + model_config = json.load(rf) |
| 62 | + model_config['pretrained_model_dir'] = os.path.dirname(config_path) |
| 63 | + model_config.update(kwargs) |
| 64 | + return cls( |
| 65 | + pretrained_model_dir=model_config['pretrained_model_dir'], |
| 66 | + head_dropout=model_config.get("head_dropout", 0.5), |
| 67 | + init=model_config.get('init', False) |
| 68 | + ) |
| 69 | + |
| 70 | + def save_config(self, config_dir): |
| 71 | + config_path = os.path.join(config_dir, "model_config.json") |
| 72 | + with open(config_path, "w", encoding="utf-8") as wf: |
| 73 | + json.dump(self.config.to_dict(), wf, ensure_ascii=False, indent=2) |
| 74 | + self.jiuzhang.config.save_pretrained(config_dir) |
| 75 | + |
| 76 | + |
| 77 | +class JiuzhangForKnowledgePrediction(BaseModel): |
| 78 | + def __init__(self, |
| 79 | + pretrained_model_dir=None, |
| 80 | + num_classes_list: List[int] = None, |
| 81 | + num_total_classes: int = None, |
| 82 | + head_dropout=0.5, |
| 83 | + flat_cls_weight=0.5, |
| 84 | + attention_unit_size=256, |
| 85 | + fc_hidden_size=512, |
| 86 | + beta=0.5, |
| 87 | + init=True |
| 88 | + ): |
| 89 | + super(JiuzhangForKnowledgePrediction, self).__init__() |
| 90 | + jiuzhang_config = JiuzhangConfig.from_pretrained(pretrained_model_dir) |
| 91 | + if init: |
| 92 | + print(f'Load Jiuzhang from checkpoint: {pretrained_model_dir}') |
| 93 | + self.jiuzhang = JiuzhangModel.from_pretrained(pretrained_model_dir, ignore_mismatched_sizes=True) |
| 94 | + else: |
| 95 | + print(f'Load Jiuzhang from config: {pretrained_model_dir}') |
| 96 | + self.jiuzhang = JiuzhangModel(jiuzhang_config) |
| 97 | + self.hidden_size = self.jiuzhang.config.hidden_size |
| 98 | + self.head_dropout = head_dropout |
| 99 | + self.dropout = nn.Dropout(head_dropout) |
| 100 | + self.sigmoid = nn.Sigmoid() |
| 101 | + self.criterion = nn.MSELoss() |
| 102 | + self.flat_classifier = nn.Linear(self.hidden_size, num_total_classes) |
| 103 | + self.ham_classifier = HAM( |
| 104 | + num_classes_list=num_classes_list, |
| 105 | + num_total_classes=num_total_classes, |
| 106 | + sequence_model_hidden_size=self.jiuzhang.config.hidden_size, |
| 107 | + attention_unit_size=attention_unit_size, |
| 108 | + fc_hidden_size=fc_hidden_size, |
| 109 | + beta=beta, |
| 110 | + dropout_rate=head_dropout |
| 111 | + ) |
| 112 | + self.flat_cls_weight = flat_cls_weight |
| 113 | + self.num_classes_list = num_classes_list |
| 114 | + self.num_total_classes = num_total_classes |
| 115 | + |
| 116 | + self.config = {k: v for k, v in locals().items() if k not in ["self", "__class__", "jiuzhang_config"]} |
| 117 | + self.config['architecture'] = 'JiuzhangForKnowledgePrediction' |
| 118 | + self.config = PretrainedConfig.from_dict(self.config) |
| 119 | + |
| 120 | + def forward(self, |
| 121 | + input_ids=None, |
| 122 | + attention_mask=None, |
| 123 | + token_type_ids=None, |
| 124 | + labels=None): |
| 125 | + outputs = self.jiuzhang(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids) |
| 126 | + item_embeds = outputs.last_hidden_state[:, 0, :] |
| 127 | + item_embeds = self.dropout(item_embeds) |
| 128 | + tokens_embeds = outputs.last_hidden_state |
| 129 | + tokens_embeds = self.dropout(tokens_embeds) |
| 130 | + flat_logits = self.sigmoid(self.flat_classifier(item_embeds)) |
| 131 | + ham_outputs = self.ham_classifier(tokens_embeds) |
| 132 | + ham_logits = self.sigmoid(ham_outputs.scores) |
| 133 | + logits = self.flat_cls_weight * flat_logits + (1 - self.flat_cls_weight) * ham_logits |
| 134 | + loss = None |
| 135 | + if labels is not None: |
| 136 | + labels = torch.sum(torch.nn.functional.one_hot(labels, num_classes=self.num_total_classes), dim=1) |
| 137 | + labels = labels.float() |
| 138 | + loss = self.criterion(logits, labels) if labels is not None else None |
| 139 | + return KnowledgePredictionOutput( |
| 140 | + loss=loss, |
| 141 | + logits=logits, |
| 142 | + ) |
| 143 | + |
| 144 | + @classmethod |
| 145 | + def from_config(cls, config_path, **kwargs): |
| 146 | + config_path = os.path.join(os.path.dirname(config_path), 'model_config.json') |
| 147 | + with open(config_path, "r", encoding="utf-8") as rf: |
| 148 | + model_config = json.load(rf) |
| 149 | + model_config['pretrained_model_dir'] = os.path.dirname(config_path) |
| 150 | + model_config.update(kwargs) |
| 151 | + return cls( |
| 152 | + pretrained_model_dir=model_config['pretrained_model_dir'], |
| 153 | + head_dropout=model_config.get("head_dropout", 0.5), |
| 154 | + num_classes_list=model_config.get('num_classes_list'), |
| 155 | + num_total_classes=model_config.get('num_total_classes'), |
| 156 | + flat_cls_weight=model_config.get('flat_cls_weight', 0.5), |
| 157 | + attention_unit_size=model_config.get('attention_unit_size', 256), |
| 158 | + fc_hidden_size=model_config.get('fc_hidden_size', 512), |
| 159 | + beta=model_config.get('beta', 0.5), |
| 160 | + init=model_config.get('init', False) |
| 161 | + ) |
| 162 | + |
| 163 | + def save_config(self, config_dir): |
| 164 | + config_path = os.path.join(config_dir, "model_config.json") |
| 165 | + with open(config_path, "w", encoding="utf-8") as wf: |
| 166 | + json.dump(self.config.to_dict(), wf, ensure_ascii=False, indent=2) |
| 167 | + self.jiuzhang.config.save_pretrained(config_dir) |
0 commit comments