\n"," "]},"metadata":{}}]},{"cell_type":"code","source":["from datasets import load_dataset\n","\n","# KLUE NER 데이터셋 로드\n","dataset = load_dataset(\"klue\", \"ner\")\n","\n","# 태그 리스트 확인\n","tag_list = dataset['train'].features['ner_tags'].feature.names\n","print(tag_list)\n","\n","# tag2id 및 id2tag 사전 생성\n","tag2id = {tag: id for id, tag in enumerate(tag_list)}\n","id2tag = {id: tag for tag, id in tag2id.items()}\n"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"KgAZf6eNjYEc","executionInfo":{"status":"ok","timestamp":1712135972978,"user_tz":-540,"elapsed":4257,"user":{"displayName":"송문선","userId":"02769911744591628940"}},"outputId":"85692f5c-d538-46f5-ea16-0a616dd3c8fa"},"execution_count":29,"outputs":[{"output_type":"stream","name":"stdout","text":["['B-DT', 'I-DT', 'B-LC', 'I-LC', 'B-OG', 'I-OG', 'B-PS', 'I-PS', 'B-QT', 'I-QT', 'B-TI', 'I-TI', 'O']\n"]}]},{"cell_type":"code","source":["from transformers import BertForTokenClassification, BertTokenizerFast, Trainer, TrainingArguments\n","from kobert_transformers import get_kobert_model, get_tokenizer\n","\n","checkpoint_path = \"/content/drive/MyDrive/capstone24-35/capstone-2024-35/Data Extract/model/checkpoint-2250\"\n","model = BertForTokenClassification.from_pretrained(checkpoint_path, num_labels=13) # num_labels는 데이터셋의 라벨 수에 맞춰 조정\n","tokenizer = KoBERTTokenizer.from_pretrained('skt/kobert-base-v1')\n"],"metadata":{"id":"VTwHK5A8oXe1","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1712135979928,"user_tz":-540,"elapsed":3379,"user":{"displayName":"송문선","userId":"02769911744591628940"}},"outputId":"c4a6819a-4c3d-4ad2-e3c8-7a57b5a9bc33"},"execution_count":30,"outputs":[{"output_type":"stream","name":"stderr","text":["The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. \n","The tokenizer class you load from this checkpoint is 'XLNetTokenizer'. \n","The class this function is called from is 'KoBERTTokenizer'.\n"]}]},{"cell_type":"code","source":["from transformers import TrainingArguments, Trainer\n","\n","training_args = TrainingArguments(\n"," output_dir=\"/content/drive/MyDrive/capstone24-35/capstone-2024-35/Data Extract/model_finetuned\", # 추가 학습 후 모델 저장 경로\n"," num_train_epochs=3, # 추가 훈련할 에포크 수\n"," per_device_train_batch_size=4,\n"," per_device_eval_batch_size=4,\n"," warmup_steps=500,\n"," weight_decay=0.01,\n"," evaluation_strategy='epoch',\n"," logging_dir=\"./logs_retrained\",\n"," save_strategy=\"epoch\",\n"," load_best_model_at_end=True,\n"," save_total_limit=3\n",")\n","\n","trainer = Trainer(\n"," model=model,\n"," args=training_args,\n"," train_dataset=train_dataset,\n"," eval_dataset=eval_dataset\n",")\n"],"metadata":{"id":"34Ew0pzKoYv1","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1712136011143,"user_tz":-540,"elapsed":536,"user":{"displayName":"송문선","userId":"02769911744591628940"}},"outputId":"80084a35-ebd7-47de-c1dc-b1b543193700"},"execution_count":33,"outputs":[{"output_type":"stream","name":"stderr","text":["/usr/local/lib/python3.10/dist-packages/accelerate/accelerator.py:432: FutureWarning: Passing the following arguments to `Accelerator` is deprecated and will be removed in version 1.0 of Accelerate: dict_keys(['dispatch_batches', 'split_batches', 'even_batches', 'use_seedable_sampler']). Please pass an `accelerate.DataLoaderConfiguration` instead: \n","dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)\n"," warnings.warn(\n"]}]},{"cell_type":"code","source":["trainer.train()\n","trainer.evaluate()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":166},"id":"cJQnBcFUYrDZ","outputId":"c6dc579e-a1cf-497d-e879-d10e9115c45f"},"execution_count":null,"outputs":[{"data":{"text/html":["\n","
"]},"metadata":{}}]}]}
\ No newline at end of file
diff --git a/DataExtract/colab/data_extract.ipynb b/DataExtract/colab/data_extract.ipynb
new file mode 100644
index 0000000000..305fb8edfd
--- /dev/null
+++ b/DataExtract/colab/data_extract.ipynb
@@ -0,0 +1 @@
+{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"authorship_tag":"ABX9TyPiD2JOhY57EgkwnhwjNrS/"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"},"widgets":{"application/vnd.jupyter.widget-state+json":{"798850727cc8425eaf6175afb3775814":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_bb436f42443c4e5fa41313748cb3095e","IPY_MODEL_4186bf16685b45c097e9439df8a18ece","IPY_MODEL_f883a7de6e414126be07c78cf097c0af"],"layout":"IPY_MODEL_ddab447cf8734f6bb37a9aef7610b152"}},"bb436f42443c4e5fa41313748cb3095e":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_c19bac5165aa45dd8b1c461be7adf1e0","placeholder":"","style":"IPY_MODEL_b9ea442fa8cc4b828b24c2b9d718c111","value":"Downloading readme: 100%"}},"4186bf16685b45c097e9439df8a18ece":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_3ed98750e09849c09f9a71cd33e295d4","max":22458,"min":0,"orientation":"horizontal","style":"IPY_MODEL_d5c7aa1db7b4428f836d55fdabb0fbf3","value":22458}},"f883a7de6e414126be07c78cf097c0af":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_beb045d4e94d48b0bc5dea7af46f1abc","placeholder":"","style":"IPY_MODEL_de70bf35152d4460b3b30c3be61ba143","value":" 22.5k/22.5k [00:00<00:00, 911kB/s]"}},"ddab447cf8734f6bb37a9aef7610b152":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"c19bac5165aa45dd8b1c461be7adf1e0":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"b9ea442fa8cc4b828b24c2b9d718c111":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"3ed98750e09849c09f9a71cd33e295d4":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"d5c7aa1db7b4428f836d55fdabb0fbf3":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"beb045d4e94d48b0bc5dea7af46f1abc":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"de70bf35152d4460b3b30c3be61ba143":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"57a73bef422341d3ac8eb092f6aecb14":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_9178609f8f464cf489f4bc546900bfab","IPY_MODEL_39a9e423c6e24ccdafb06e38856b3d88","IPY_MODEL_4eb841c6834c432db1d322d05e90d236"],"layout":"IPY_MODEL_51bacf74bbf846e288bc306a3149eb53"}},"9178609f8f464cf489f4bc546900bfab":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_4ae071d616604e4ab886bf14731b834a","placeholder":"","style":"IPY_MODEL_d138f84ba02d4aba8571fba0c6e33eff","value":"Downloading data: 100%"}},"39a9e423c6e24ccdafb06e38856b3d88":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_906967413214474a8eb3854cdb853446","max":4209983,"min":0,"orientation":"horizontal","style":"IPY_MODEL_1f8da36838184aab9a16e608f983c2a6","value":4209983}},"4eb841c6834c432db1d322d05e90d236":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_e4cffb6a102544409cdedb2cc9032b3b","placeholder":"","style":"IPY_MODEL_1f755d4f0ca94c4ca3504b00fe812db8","value":" 4.21M/4.21M [00:00<00:00, 11.7MB/s]"}},"51bacf74bbf846e288bc306a3149eb53":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"4ae071d616604e4ab886bf14731b834a":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"d138f84ba02d4aba8571fba0c6e33eff":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"906967413214474a8eb3854cdb853446":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"1f8da36838184aab9a16e608f983c2a6":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"e4cffb6a102544409cdedb2cc9032b3b":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"1f755d4f0ca94c4ca3504b00fe812db8":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"ba7a1184512d48be8125c18fa34182fb":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_2f0e8b41981b45ab8184601bc8ce43bf","IPY_MODEL_fd7d5a26155c432980e8d8af4d308b71","IPY_MODEL_5247ebeae5ef463c91820ae75753a995"],"layout":"IPY_MODEL_fea1de75da2d4b44920d34c02c0fc58d"}},"2f0e8b41981b45ab8184601bc8ce43bf":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_9c7322d7f0504713b95ed0fa0921b008","placeholder":"","style":"IPY_MODEL_67689efc6c244fb58e97a39f16fbb356","value":"Downloading data: 100%"}},"fd7d5a26155c432980e8d8af4d308b71":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_1bb81d8545ea4daea22b21d5d6703a23","max":1055904,"min":0,"orientation":"horizontal","style":"IPY_MODEL_c48406f609064d7cb781bdcb431aa3ed","value":1055904}},"5247ebeae5ef463c91820ae75753a995":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_76525f9e332d46308e9a3f7c586699c5","placeholder":"","style":"IPY_MODEL_57b16234f4034e498e8f5ac4d59e67ca","value":" 1.06M/1.06M [00:00<00:00, 4.22MB/s]"}},"fea1de75da2d4b44920d34c02c0fc58d":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"9c7322d7f0504713b95ed0fa0921b008":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"67689efc6c244fb58e97a39f16fbb356":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"1bb81d8545ea4daea22b21d5d6703a23":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"c48406f609064d7cb781bdcb431aa3ed":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"76525f9e332d46308e9a3f7c586699c5":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"57b16234f4034e498e8f5ac4d59e67ca":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"cceb1bc4b4d24816ae421f24b5b96b98":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_071081a597464f55812f505e531924ef","IPY_MODEL_a0972d998d6545ec8e6b30a9edcb1736","IPY_MODEL_d9c9cf6885e8402d84a6ef6dee35ac59"],"layout":"IPY_MODEL_0575b30f4da84f379bd2cc5d5f3190ea"}},"071081a597464f55812f505e531924ef":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_d158415735874c4e855aff4b4ac284b5","placeholder":"","style":"IPY_MODEL_5662d8a14e1949f6a821f7d013ab600c","value":"Generating train split: 100%"}},"a0972d998d6545ec8e6b30a9edcb1736":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_a8322aff5d2744509a435cfa7f29d574","max":21008,"min":0,"orientation":"horizontal","style":"IPY_MODEL_d9d585df8d914b94b2455a445e846dbd","value":21008}},"d9c9cf6885e8402d84a6ef6dee35ac59":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_26bdcb86168e4c24bd80cfc44085ebba","placeholder":"","style":"IPY_MODEL_ab9af3353c96402c9088984f06ad720d","value":" 21008/21008 [00:00<00:00, 62001.43 examples/s]"}},"0575b30f4da84f379bd2cc5d5f3190ea":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"d158415735874c4e855aff4b4ac284b5":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"5662d8a14e1949f6a821f7d013ab600c":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"a8322aff5d2744509a435cfa7f29d574":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"d9d585df8d914b94b2455a445e846dbd":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"26bdcb86168e4c24bd80cfc44085ebba":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"ab9af3353c96402c9088984f06ad720d":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"08199005375444a781594d286578be46":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_6a3c8dfbd82d49e486cd079ffb2573bd","IPY_MODEL_eae783c6f15a44fe85e6ae802787c06c","IPY_MODEL_2c014f7b70614bd0bdbaad533b91733b"],"layout":"IPY_MODEL_d9159920c64a4f1ba231c07de4e73e82"}},"6a3c8dfbd82d49e486cd079ffb2573bd":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_67a9d76161274bcdaacefbf6283b7018","placeholder":"","style":"IPY_MODEL_b12f9a5275144f4c8ccbf7b072349443","value":"Generating validation split: 100%"}},"eae783c6f15a44fe85e6ae802787c06c":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_eb831d3b82b14cb0a231036fd084fc96","max":5000,"min":0,"orientation":"horizontal","style":"IPY_MODEL_112f612f8f7a48f595e223da1f9ee4e0","value":5000}},"2c014f7b70614bd0bdbaad533b91733b":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_00d658a9dce34fc28b26263bb5154eb9","placeholder":"","style":"IPY_MODEL_6484a68609014d83939253a0436268e7","value":" 5000/5000 [00:00<00:00, 49023.05 examples/s]"}},"d9159920c64a4f1ba231c07de4e73e82":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"67a9d76161274bcdaacefbf6283b7018":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"b12f9a5275144f4c8ccbf7b072349443":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"eb831d3b82b14cb0a231036fd084fc96":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"112f612f8f7a48f595e223da1f9ee4e0":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"00d658a9dce34fc28b26263bb5154eb9":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"6484a68609014d83939253a0436268e7":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"5dc05eaa6fe34306ae2796d69d16fa68":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_97705820fa3b49ec8efe7bbf014b662e","IPY_MODEL_eb7fb0ae872f484bbf518397d4c8dcdf","IPY_MODEL_83283ced534a465b9074b3fe75a8b08b"],"layout":"IPY_MODEL_dfa410ba3b724894ab59956a72d98295"}},"97705820fa3b49ec8efe7bbf014b662e":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_63e5abaded4342519019f1de5770a076","placeholder":"","style":"IPY_MODEL_2edd41dcd74e440b980099a3d559310a","value":"tokenizer_config.json: 100%"}},"eb7fb0ae872f484bbf518397d4c8dcdf":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_67d2af8067d4443ba22c11afc38671f7","max":432,"min":0,"orientation":"horizontal","style":"IPY_MODEL_59d0ce4816644730b431bb7285df89f1","value":432}},"83283ced534a465b9074b3fe75a8b08b":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_e18329aa4583426ab72de02174098b18","placeholder":"","style":"IPY_MODEL_b7698e5108764415b8f174a4c2abf82b","value":" 432/432 [00:00<00:00, 20.4kB/s]"}},"dfa410ba3b724894ab59956a72d98295":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"63e5abaded4342519019f1de5770a076":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"2edd41dcd74e440b980099a3d559310a":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"67d2af8067d4443ba22c11afc38671f7":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"59d0ce4816644730b431bb7285df89f1":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"e18329aa4583426ab72de02174098b18":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"b7698e5108764415b8f174a4c2abf82b":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"9ccbbdd69f514c5fb754fa943f6cf1ad":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_959e73767aff4a0caf8472b13fcb1a4b","IPY_MODEL_ef299b638e534afe9ed650a78f85ea59","IPY_MODEL_2288b0eddbad47e597546036aafbd675"],"layout":"IPY_MODEL_2fb1159d0c42467383adcf891620ca9d"}},"959e73767aff4a0caf8472b13fcb1a4b":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_de6771853ca843e1b55eaef438e717bb","placeholder":"","style":"IPY_MODEL_6fad117f7b04450aa71c8e4303938c37","value":"spiece.model: 100%"}},"ef299b638e534afe9ed650a78f85ea59":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_6c1cf9c5f0d7479daa5375ad513bab59","max":371427,"min":0,"orientation":"horizontal","style":"IPY_MODEL_e0dfec409b014823b611f24c448d1185","value":371427}},"2288b0eddbad47e597546036aafbd675":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_06cceeec756945b48afc750cf2e9bd8b","placeholder":"","style":"IPY_MODEL_4f922444e8624286b81a2d2f1a7acf77","value":" 371k/371k [00:00<00:00, 4.23MB/s]"}},"2fb1159d0c42467383adcf891620ca9d":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"de6771853ca843e1b55eaef438e717bb":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"6fad117f7b04450aa71c8e4303938c37":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"6c1cf9c5f0d7479daa5375ad513bab59":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"e0dfec409b014823b611f24c448d1185":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"06cceeec756945b48afc750cf2e9bd8b":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"4f922444e8624286b81a2d2f1a7acf77":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"e53a751d94134087a276e9bf71ca5716":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_f0078b915b4743358760179db2899105","IPY_MODEL_b3d21512f52847d6bb56dd4eae3aeb5f","IPY_MODEL_b0f27f0ea5f442b791ae7fa2c11f1cb3"],"layout":"IPY_MODEL_79d37962b77c4e62acf6129ca71e9fca"}},"f0078b915b4743358760179db2899105":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_098d8ab617ab4da88070a386136163af","placeholder":"","style":"IPY_MODEL_39aa786ad32b489598f23409c3756cfc","value":"special_tokens_map.json: 100%"}},"b3d21512f52847d6bb56dd4eae3aeb5f":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_c6787d1bd82944bebf94d4ca553d6dc8","max":244,"min":0,"orientation":"horizontal","style":"IPY_MODEL_b39fcdc03fef40b8be066a9ee3b7da43","value":244}},"b0f27f0ea5f442b791ae7fa2c11f1cb3":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_f1bc4da9f18747bfa1d632916bdd252f","placeholder":"","style":"IPY_MODEL_6129970c0f3e4211a8c0b9be920b2e2e","value":" 244/244 [00:00<00:00, 8.27kB/s]"}},"79d37962b77c4e62acf6129ca71e9fca":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"098d8ab617ab4da88070a386136163af":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"39aa786ad32b489598f23409c3756cfc":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"c6787d1bd82944bebf94d4ca553d6dc8":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"b39fcdc03fef40b8be066a9ee3b7da43":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"f1bc4da9f18747bfa1d632916bdd252f":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"6129970c0f3e4211a8c0b9be920b2e2e":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}}}}},"cells":[{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"UAeFpCJkvYvZ","executionInfo":{"status":"ok","timestamp":1712127603483,"user_tz":-540,"elapsed":23898,"user":{"displayName":"송문선","userId":"02769911744591628940"}},"outputId":"9f30d1ab-7d7f-4443-f19c-d8c73e9274e5"},"outputs":[{"output_type":"stream","name":"stdout","text":["Mounted at /content/drive\n"]}],"source":["from google.colab import drive\n","drive.mount('/content/drive')"]},{"cell_type":"code","source":["cd /content/drive/MyDrive/capstone24-35/capstone-2024-35/Data Extract/colab"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"9ilkxEuVvfzZ","executionInfo":{"status":"ok","timestamp":1712127607671,"user_tz":-540,"elapsed":3,"user":{"displayName":"송문선","userId":"02769911744591628940"}},"outputId":"22442f71-a386-4140-cb0a-6fa0bba8615e"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["/content/drive/MyDrive/capstone24-35/capstone-2024-35/Data Extract/colab\n"]}]},{"cell_type":"code","source":["!pip install torch transformers datasets\n","!pip install git+https://github.com/SKTBrain/KoBERT.git@master\n","!pip install 'git+https://github.com/SKTBrain/KoBERT.git#egg=kobert_tokenizer&subdirectory=kobert_hf'\n","!pip install kobert-transformers\n","!pip install transformers\n","!pip install torch\n","!pip install datasets\n","!pip install seqeval # NER 평가를 위한 라이브러리\n","!pip install transformers[torch] -U\n","\n","from transformers import BertTokenizerFast, BertForTokenClassification, AdamW\n","from transformers import Trainer, TrainingArguments\n","from datasets import load_dataset, load_metric\n","import torch\n","from torch.utils.data import DataLoader\n","from torch.nn.utils.rnn import pad_sequence\n","import numpy as np\n","from kobert_tokenizer import KoBERTTokenizer\n"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"trszjdkevhPk","executionInfo":{"status":"ok","timestamp":1712127787050,"user_tz":-540,"elapsed":176299,"user":{"displayName":"송문선","userId":"02769911744591628940"}},"outputId":"45b0d4be-10f2-4444-f7a2-af602251400c"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (2.2.1+cu121)\n","Requirement already satisfied: transformers in /usr/local/lib/python3.10/dist-packages (4.38.2)\n","Collecting datasets\n"," Downloading datasets-2.18.0-py3-none-any.whl (510 kB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m510.5/510.5 kB\u001b[0m \u001b[31m6.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch) (3.13.3)\n","Requirement already satisfied: typing-extensions>=4.8.0 in /usr/local/lib/python3.10/dist-packages (from torch) (4.10.0)\n","Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch) (1.12)\n","Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch) (3.2.1)\n","Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch) (3.1.3)\n","Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch) (2023.6.0)\n","Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch)\n"," Downloading nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m23.7/23.7 MB\u001b[0m \u001b[31m38.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hCollecting nvidia-cuda-runtime-cu12==12.1.105 (from torch)\n"," Downloading nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m823.6/823.6 kB\u001b[0m \u001b[31m68.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hCollecting nvidia-cuda-cupti-cu12==12.1.105 (from torch)\n"," Downloading nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m14.1/14.1 MB\u001b[0m \u001b[31m67.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hCollecting nvidia-cudnn-cu12==8.9.2.26 (from torch)\n"," Downloading nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m731.7/731.7 MB\u001b[0m \u001b[31m2.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hCollecting nvidia-cublas-cu12==12.1.3.1 (from torch)\n"," Downloading nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m410.6/410.6 MB\u001b[0m \u001b[31m3.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hCollecting nvidia-cufft-cu12==11.0.2.54 (from torch)\n"," Downloading nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl (121.6 MB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m121.6/121.6 MB\u001b[0m \u001b[31m8.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hCollecting nvidia-curand-cu12==10.3.2.106 (from torch)\n"," Downloading nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl (56.5 MB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m56.5/56.5 MB\u001b[0m \u001b[31m13.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hCollecting nvidia-cusolver-cu12==11.4.5.107 (from torch)\n"," Downloading nvidia_cusolver_cu12-11.4.5.107-py3-none-manylinux1_x86_64.whl (124.2 MB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m124.2/124.2 MB\u001b[0m \u001b[31m8.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hCollecting nvidia-cusparse-cu12==12.1.0.106 (from torch)\n"," Downloading nvidia_cusparse_cu12-12.1.0.106-py3-none-manylinux1_x86_64.whl (196.0 MB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m196.0/196.0 MB\u001b[0m \u001b[31m2.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hCollecting nvidia-nccl-cu12==2.19.3 (from torch)\n"," Downloading nvidia_nccl_cu12-2.19.3-py3-none-manylinux1_x86_64.whl (166.0 MB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m166.0/166.0 MB\u001b[0m \u001b[31m6.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hCollecting nvidia-nvtx-cu12==12.1.105 (from torch)\n"," Downloading nvidia_nvtx_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (99 kB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m99.1/99.1 kB\u001b[0m \u001b[31m15.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: triton==2.2.0 in /usr/local/lib/python3.10/dist-packages (from torch) (2.2.0)\n","Collecting nvidia-nvjitlink-cu12 (from nvidia-cusolver-cu12==11.4.5.107->torch)\n"," Downloading nvidia_nvjitlink_cu12-12.4.99-py3-none-manylinux2014_x86_64.whl (21.1 MB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m21.1/21.1 MB\u001b[0m \u001b[31m74.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: huggingface-hub<1.0,>=0.19.3 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.20.3)\n","Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (1.25.2)\n","Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers) (24.0)\n","Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (6.0.1)\n","Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (2023.12.25)\n","Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers) (2.31.0)\n","Requirement already satisfied: tokenizers<0.19,>=0.14 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.15.2)\n","Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.4.2)\n","Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers) (4.66.2)\n","Requirement already satisfied: pyarrow>=12.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (14.0.2)\n","Requirement already satisfied: pyarrow-hotfix in /usr/local/lib/python3.10/dist-packages (from datasets) (0.6)\n","Collecting dill<0.3.9,>=0.3.0 (from datasets)\n"," Downloading dill-0.3.8-py3-none-any.whl (116 kB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m19.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from datasets) (1.5.3)\n","Collecting xxhash (from datasets)\n"," Downloading xxhash-3.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m23.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hCollecting multiprocess (from datasets)\n"," Downloading multiprocess-0.70.16-py310-none-any.whl (134 kB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m21.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets) (3.9.3)\n","Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.3.1)\n","Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (23.2.0)\n","Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.4.1)\n","Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (6.0.5)\n","Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.9.4)\n","Requirement already satisfied: async-timeout<5.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (4.0.3)\n","Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.3.2)\n","Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.6)\n","Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2.0.7)\n","Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2024.2.2)\n","Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch) (2.1.5)\n","Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2.8.2)\n","Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2023.4)\n","Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch) (1.3.0)\n","Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.1->pandas->datasets) (1.16.0)\n","Installing collected packages: xxhash, nvidia-nvtx-cu12, nvidia-nvjitlink-cu12, nvidia-nccl-cu12, nvidia-curand-cu12, nvidia-cufft-cu12, nvidia-cuda-runtime-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-cupti-cu12, nvidia-cublas-cu12, dill, nvidia-cusparse-cu12, nvidia-cudnn-cu12, multiprocess, nvidia-cusolver-cu12, datasets\n","Successfully installed datasets-2.18.0 dill-0.3.8 multiprocess-0.70.16 nvidia-cublas-cu12-12.1.3.1 nvidia-cuda-cupti-cu12-12.1.105 nvidia-cuda-nvrtc-cu12-12.1.105 nvidia-cuda-runtime-cu12-12.1.105 nvidia-cudnn-cu12-8.9.2.26 nvidia-cufft-cu12-11.0.2.54 nvidia-curand-cu12-10.3.2.106 nvidia-cusolver-cu12-11.4.5.107 nvidia-cusparse-cu12-12.1.0.106 nvidia-nccl-cu12-2.19.3 nvidia-nvjitlink-cu12-12.4.99 nvidia-nvtx-cu12-12.1.105 xxhash-3.4.1\n","Collecting git+https://github.com/SKTBrain/KoBERT.git@master\n"," Cloning https://github.com/SKTBrain/KoBERT.git (to revision master) to /tmp/pip-req-build-e07zcp5e\n"," Running command git clone --filter=blob:none --quiet https://github.com/SKTBrain/KoBERT.git /tmp/pip-req-build-e07zcp5e\n"," Resolved https://github.com/SKTBrain/KoBERT.git to commit 47a69af87928fc24e20f571fe10c3cc9dd9af9a3\n"," Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n","Collecting boto3<=1.15.18 (from kobert==0.2.3)\n"," Downloading boto3-1.15.18-py2.py3-none-any.whl (129 kB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m129.1/129.1 kB\u001b[0m \u001b[31m2.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hCollecting gluonnlp<=0.10.0,>=0.6.0 (from kobert==0.2.3)\n"," Downloading gluonnlp-0.10.0.tar.gz (344 kB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m344.5/344.5 kB\u001b[0m \u001b[31m13.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n","Collecting mxnet<=1.7.0.post2,>=1.4.0 (from kobert==0.2.3)\n"," Downloading mxnet-1.7.0.post2-py2.py3-none-manylinux2014_x86_64.whl (54.7 MB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m54.7/54.7 MB\u001b[0m \u001b[31m10.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hINFO: pip is looking at multiple versions of kobert to determine which version is compatible with other requirements. This could take a while.\n","\u001b[31mERROR: Could not find a version that satisfies the requirement onnxruntime<=1.8.0,==1.8.0 (from kobert) (from versions: 1.12.0, 1.12.1, 1.13.1, 1.14.0, 1.14.1, 1.15.0, 1.15.1, 1.16.0, 1.16.1, 1.16.2, 1.16.3, 1.17.0, 1.17.1)\u001b[0m\u001b[31m\n","\u001b[0m\u001b[31mERROR: No matching distribution found for onnxruntime<=1.8.0,==1.8.0\u001b[0m\u001b[31m\n","\u001b[0mCollecting kobert_tokenizer\n"," Cloning https://github.com/SKTBrain/KoBERT.git to /tmp/pip-install-rpi10j3p/kobert-tokenizer_b12cf91fc69e4ef1a73ede800b0f20a6\n"," Running command git clone --filter=blob:none --quiet https://github.com/SKTBrain/KoBERT.git /tmp/pip-install-rpi10j3p/kobert-tokenizer_b12cf91fc69e4ef1a73ede800b0f20a6\n"," Resolved https://github.com/SKTBrain/KoBERT.git to commit 47a69af87928fc24e20f571fe10c3cc9dd9af9a3\n"," Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n","Building wheels for collected packages: kobert_tokenizer\n"," Building wheel for kobert_tokenizer (setup.py) ... \u001b[?25l\u001b[?25hdone\n"," Created wheel for kobert_tokenizer: filename=kobert_tokenizer-0.1-py3-none-any.whl size=4633 sha256=54e461c89c47ee7738d576d87845ddea77e57e962b315bf7129113f826c386bb\n"," Stored in directory: /tmp/pip-ephem-wheel-cache-hpbe4al4/wheels/e9/1a/3f/a864970e8a169c176befa3c4a1e07aa612f69195907a4045fe\n","Successfully built kobert_tokenizer\n","Installing collected packages: kobert_tokenizer\n","Successfully installed kobert_tokenizer-0.1\n","Collecting kobert-transformers\n"," Downloading kobert_transformers-0.5.1-py3-none-any.whl (12 kB)\n","Requirement already satisfied: torch>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from kobert-transformers) (2.2.1+cu121)\n","Requirement already satisfied: transformers<5,>=3 in /usr/local/lib/python3.10/dist-packages (from kobert-transformers) (4.38.2)\n","Requirement already satisfied: sentencepiece>=0.1.91 in /usr/local/lib/python3.10/dist-packages (from kobert-transformers) (0.1.99)\n","Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch>=1.1.0->kobert-transformers) (3.13.3)\n","Requirement already satisfied: typing-extensions>=4.8.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.1.0->kobert-transformers) (4.10.0)\n","Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch>=1.1.0->kobert-transformers) (1.12)\n","Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.1.0->kobert-transformers) (3.2.1)\n","Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.1.0->kobert-transformers) (3.1.3)\n","Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch>=1.1.0->kobert-transformers) (2023.6.0)\n","Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch>=1.1.0->kobert-transformers) (12.1.105)\n","Requirement already satisfied: nvidia-cuda-runtime-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch>=1.1.0->kobert-transformers) (12.1.105)\n","Requirement already satisfied: nvidia-cuda-cupti-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch>=1.1.0->kobert-transformers) (12.1.105)\n","Requirement already satisfied: nvidia-cudnn-cu12==8.9.2.26 in /usr/local/lib/python3.10/dist-packages (from torch>=1.1.0->kobert-transformers) (8.9.2.26)\n","Requirement already satisfied: nvidia-cublas-cu12==12.1.3.1 in /usr/local/lib/python3.10/dist-packages (from torch>=1.1.0->kobert-transformers) (12.1.3.1)\n","Requirement already satisfied: nvidia-cufft-cu12==11.0.2.54 in /usr/local/lib/python3.10/dist-packages (from torch>=1.1.0->kobert-transformers) (11.0.2.54)\n","Requirement already satisfied: nvidia-curand-cu12==10.3.2.106 in /usr/local/lib/python3.10/dist-packages (from torch>=1.1.0->kobert-transformers) (10.3.2.106)\n","Requirement already satisfied: nvidia-cusolver-cu12==11.4.5.107 in /usr/local/lib/python3.10/dist-packages (from torch>=1.1.0->kobert-transformers) (11.4.5.107)\n","Requirement already satisfied: nvidia-cusparse-cu12==12.1.0.106 in /usr/local/lib/python3.10/dist-packages (from torch>=1.1.0->kobert-transformers) (12.1.0.106)\n","Requirement already satisfied: nvidia-nccl-cu12==2.19.3 in /usr/local/lib/python3.10/dist-packages (from torch>=1.1.0->kobert-transformers) (2.19.3)\n","Requirement already satisfied: nvidia-nvtx-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch>=1.1.0->kobert-transformers) (12.1.105)\n","Requirement already satisfied: triton==2.2.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.1.0->kobert-transformers) (2.2.0)\n","Requirement already satisfied: nvidia-nvjitlink-cu12 in /usr/local/lib/python3.10/dist-packages (from nvidia-cusolver-cu12==11.4.5.107->torch>=1.1.0->kobert-transformers) (12.4.99)\n","Requirement already satisfied: huggingface-hub<1.0,>=0.19.3 in /usr/local/lib/python3.10/dist-packages (from transformers<5,>=3->kobert-transformers) (0.20.3)\n","Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers<5,>=3->kobert-transformers) (1.25.2)\n","Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers<5,>=3->kobert-transformers) (24.0)\n","Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers<5,>=3->kobert-transformers) (6.0.1)\n","Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers<5,>=3->kobert-transformers) (2023.12.25)\n","Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers<5,>=3->kobert-transformers) (2.31.0)\n","Requirement already satisfied: tokenizers<0.19,>=0.14 in /usr/local/lib/python3.10/dist-packages (from transformers<5,>=3->kobert-transformers) (0.15.2)\n","Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.10/dist-packages (from transformers<5,>=3->kobert-transformers) (0.4.2)\n","Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers<5,>=3->kobert-transformers) (4.66.2)\n","Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=1.1.0->kobert-transformers) (2.1.5)\n","Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->transformers<5,>=3->kobert-transformers) (3.3.2)\n","Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers<5,>=3->kobert-transformers) (3.6)\n","Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers<5,>=3->kobert-transformers) (2.0.7)\n","Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers<5,>=3->kobert-transformers) (2024.2.2)\n","Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=1.1.0->kobert-transformers) (1.3.0)\n","Installing collected packages: kobert-transformers\n","Successfully installed kobert-transformers-0.5.1\n","Requirement already satisfied: transformers in /usr/local/lib/python3.10/dist-packages (4.38.2)\n","Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers) (3.13.3)\n","Requirement already satisfied: huggingface-hub<1.0,>=0.19.3 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.20.3)\n","Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (1.25.2)\n","Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers) (24.0)\n","Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (6.0.1)\n","Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (2023.12.25)\n","Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers) (2.31.0)\n","Requirement already satisfied: tokenizers<0.19,>=0.14 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.15.2)\n","Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.4.2)\n","Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers) (4.66.2)\n","Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.19.3->transformers) (2023.6.0)\n","Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.19.3->transformers) (4.10.0)\n","Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.3.2)\n","Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.6)\n","Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2.0.7)\n","Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2024.2.2)\n","Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (2.2.1+cu121)\n","Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch) (3.13.3)\n","Requirement already satisfied: typing-extensions>=4.8.0 in /usr/local/lib/python3.10/dist-packages (from torch) (4.10.0)\n","Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch) (1.12)\n","Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch) (3.2.1)\n","Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch) (3.1.3)\n","Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch) (2023.6.0)\n","Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch) (12.1.105)\n","Requirement already satisfied: nvidia-cuda-runtime-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch) (12.1.105)\n","Requirement already satisfied: nvidia-cuda-cupti-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch) (12.1.105)\n","Requirement already satisfied: nvidia-cudnn-cu12==8.9.2.26 in /usr/local/lib/python3.10/dist-packages (from torch) (8.9.2.26)\n","Requirement already satisfied: nvidia-cublas-cu12==12.1.3.1 in /usr/local/lib/python3.10/dist-packages (from torch) (12.1.3.1)\n","Requirement already satisfied: nvidia-cufft-cu12==11.0.2.54 in /usr/local/lib/python3.10/dist-packages (from torch) (11.0.2.54)\n","Requirement already satisfied: nvidia-curand-cu12==10.3.2.106 in /usr/local/lib/python3.10/dist-packages (from torch) (10.3.2.106)\n","Requirement already satisfied: nvidia-cusolver-cu12==11.4.5.107 in /usr/local/lib/python3.10/dist-packages (from torch) (11.4.5.107)\n","Requirement already satisfied: nvidia-cusparse-cu12==12.1.0.106 in /usr/local/lib/python3.10/dist-packages (from torch) (12.1.0.106)\n","Requirement already satisfied: nvidia-nccl-cu12==2.19.3 in /usr/local/lib/python3.10/dist-packages (from torch) (2.19.3)\n","Requirement already satisfied: nvidia-nvtx-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch) (12.1.105)\n","Requirement already satisfied: triton==2.2.0 in /usr/local/lib/python3.10/dist-packages (from torch) (2.2.0)\n","Requirement already satisfied: nvidia-nvjitlink-cu12 in /usr/local/lib/python3.10/dist-packages (from nvidia-cusolver-cu12==11.4.5.107->torch) (12.4.99)\n","Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch) (2.1.5)\n","Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch) (1.3.0)\n","Requirement already satisfied: datasets in /usr/local/lib/python3.10/dist-packages (2.18.0)\n","Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from datasets) (3.13.3)\n","Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from datasets) (1.25.2)\n","Requirement already satisfied: pyarrow>=12.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (14.0.2)\n","Requirement already satisfied: pyarrow-hotfix in /usr/local/lib/python3.10/dist-packages (from datasets) (0.6)\n","Requirement already satisfied: dill<0.3.9,>=0.3.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (0.3.8)\n","Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from datasets) (1.5.3)\n","Requirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (2.31.0)\n","Requirement already satisfied: tqdm>=4.62.1 in /usr/local/lib/python3.10/dist-packages (from datasets) (4.66.2)\n","Requirement already satisfied: xxhash in /usr/local/lib/python3.10/dist-packages (from datasets) (3.4.1)\n","Requirement already satisfied: multiprocess in /usr/local/lib/python3.10/dist-packages (from datasets) (0.70.16)\n","Requirement already satisfied: fsspec[http]<=2024.2.0,>=2023.1.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (2023.6.0)\n","Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets) (3.9.3)\n","Requirement already satisfied: huggingface-hub>=0.19.4 in /usr/local/lib/python3.10/dist-packages (from datasets) (0.20.3)\n","Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from datasets) (24.0)\n","Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from datasets) (6.0.1)\n","Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.3.1)\n","Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (23.2.0)\n","Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.4.1)\n","Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (6.0.5)\n","Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.9.4)\n","Requirement already satisfied: async-timeout<5.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (4.0.3)\n","Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.19.4->datasets) (4.10.0)\n","Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets) (3.3.2)\n","Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets) (3.6)\n","Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets) (2.0.7)\n","Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets) (2024.2.2)\n","Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2.8.2)\n","Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2023.4)\n","Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.1->pandas->datasets) (1.16.0)\n","Collecting seqeval\n"," Downloading seqeval-1.2.2.tar.gz (43 kB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m43.6/43.6 kB\u001b[0m \u001b[31m1.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n","Requirement already satisfied: numpy>=1.14.0 in /usr/local/lib/python3.10/dist-packages (from seqeval) (1.25.2)\n","Requirement already satisfied: scikit-learn>=0.21.3 in /usr/local/lib/python3.10/dist-packages (from seqeval) (1.2.2)\n","Requirement already satisfied: scipy>=1.3.2 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=0.21.3->seqeval) (1.11.4)\n","Requirement already satisfied: joblib>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=0.21.3->seqeval) (1.3.2)\n","Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=0.21.3->seqeval) (3.4.0)\n","Building wheels for collected packages: seqeval\n"," Building wheel for seqeval (setup.py) ... \u001b[?25l\u001b[?25hdone\n"," Created wheel for seqeval: filename=seqeval-1.2.2-py3-none-any.whl size=16161 sha256=5748df05d23b1ecdcbd2c192b4577fceaf9e13f4c2be41b6e8ae6fae6ba15210\n"," Stored in directory: /root/.cache/pip/wheels/1a/67/4a/ad4082dd7dfc30f2abfe4d80a2ed5926a506eb8a972b4767fa\n","Successfully built seqeval\n","Installing collected packages: seqeval\n","Successfully installed seqeval-1.2.2\n","Requirement already satisfied: transformers[torch] in /usr/local/lib/python3.10/dist-packages (4.38.2)\n","Collecting transformers[torch]\n"," Downloading transformers-4.39.3-py3-none-any.whl (8.8 MB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m8.8/8.8 MB\u001b[0m \u001b[31m46.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (3.13.3)\n","Requirement already satisfied: huggingface-hub<1.0,>=0.19.3 in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (0.20.3)\n","Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (1.25.2)\n","Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (24.0)\n","Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (6.0.1)\n","Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (2023.12.25)\n","Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (2.31.0)\n","Requirement already satisfied: tokenizers<0.19,>=0.14 in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (0.15.2)\n","Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (0.4.2)\n","Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (4.66.2)\n","Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (2.2.1+cu121)\n","Collecting accelerate>=0.21.0 (from transformers[torch])\n"," Downloading accelerate-0.28.0-py3-none-any.whl (290 kB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m290.1/290.1 kB\u001b[0m \u001b[31m38.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from accelerate>=0.21.0->transformers[torch]) (5.9.5)\n","Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.19.3->transformers[torch]) (2023.6.0)\n","Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.19.3->transformers[torch]) (4.10.0)\n","Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch->transformers[torch]) (1.12)\n","Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch->transformers[torch]) (3.2.1)\n","Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch->transformers[torch]) (3.1.3)\n","Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch->transformers[torch]) (12.1.105)\n","Requirement already satisfied: nvidia-cuda-runtime-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch->transformers[torch]) (12.1.105)\n","Requirement already satisfied: nvidia-cuda-cupti-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch->transformers[torch]) (12.1.105)\n","Requirement already satisfied: nvidia-cudnn-cu12==8.9.2.26 in /usr/local/lib/python3.10/dist-packages (from torch->transformers[torch]) (8.9.2.26)\n","Requirement already satisfied: nvidia-cublas-cu12==12.1.3.1 in /usr/local/lib/python3.10/dist-packages (from torch->transformers[torch]) (12.1.3.1)\n","Requirement already satisfied: nvidia-cufft-cu12==11.0.2.54 in /usr/local/lib/python3.10/dist-packages (from torch->transformers[torch]) (11.0.2.54)\n","Requirement already satisfied: nvidia-curand-cu12==10.3.2.106 in /usr/local/lib/python3.10/dist-packages (from torch->transformers[torch]) (10.3.2.106)\n","Requirement already satisfied: nvidia-cusolver-cu12==11.4.5.107 in /usr/local/lib/python3.10/dist-packages (from torch->transformers[torch]) (11.4.5.107)\n","Requirement already satisfied: nvidia-cusparse-cu12==12.1.0.106 in /usr/local/lib/python3.10/dist-packages (from torch->transformers[torch]) (12.1.0.106)\n","Requirement already satisfied: nvidia-nccl-cu12==2.19.3 in /usr/local/lib/python3.10/dist-packages (from torch->transformers[torch]) (2.19.3)\n","Requirement already satisfied: nvidia-nvtx-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch->transformers[torch]) (12.1.105)\n","Requirement already satisfied: triton==2.2.0 in /usr/local/lib/python3.10/dist-packages (from torch->transformers[torch]) (2.2.0)\n","Requirement already satisfied: nvidia-nvjitlink-cu12 in /usr/local/lib/python3.10/dist-packages (from nvidia-cusolver-cu12==11.4.5.107->torch->transformers[torch]) (12.4.99)\n","Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->transformers[torch]) (3.3.2)\n","Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers[torch]) (3.6)\n","Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers[torch]) (2.0.7)\n","Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers[torch]) (2024.2.2)\n","Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch->transformers[torch]) (2.1.5)\n","Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch->transformers[torch]) (1.3.0)\n","Installing collected packages: transformers, accelerate\n"," Attempting uninstall: transformers\n"," Found existing installation: transformers 4.38.2\n"," Uninstalling transformers-4.38.2:\n"," Successfully uninstalled transformers-4.38.2\n","Successfully installed accelerate-0.28.0 transformers-4.39.3\n"]}]},{"cell_type":"code","source":["from datasets import load_dataset\n","\n","# KLUE NER 데이터셋 로드\n","dataset = load_dataset(\"klue\", \"ner\")\n","\n","# 태그 리스트 확인\n","tag_list = dataset['train'].features['ner_tags'].feature.names\n","print(tag_list)\n","\n","# tag2id 및 id2tag 사전 생성\n","tag2id = {tag: id for id, tag in enumerate(tag_list)}\n","id2tag = {id: tag for tag, id in tag2id.items()}\n"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":322,"referenced_widgets":["798850727cc8425eaf6175afb3775814","bb436f42443c4e5fa41313748cb3095e","4186bf16685b45c097e9439df8a18ece","f883a7de6e414126be07c78cf097c0af","ddab447cf8734f6bb37a9aef7610b152","c19bac5165aa45dd8b1c461be7adf1e0","b9ea442fa8cc4b828b24c2b9d718c111","3ed98750e09849c09f9a71cd33e295d4","d5c7aa1db7b4428f836d55fdabb0fbf3","beb045d4e94d48b0bc5dea7af46f1abc","de70bf35152d4460b3b30c3be61ba143","57a73bef422341d3ac8eb092f6aecb14","9178609f8f464cf489f4bc546900bfab","39a9e423c6e24ccdafb06e38856b3d88","4eb841c6834c432db1d322d05e90d236","51bacf74bbf846e288bc306a3149eb53","4ae071d616604e4ab886bf14731b834a","d138f84ba02d4aba8571fba0c6e33eff","906967413214474a8eb3854cdb853446","1f8da36838184aab9a16e608f983c2a6","e4cffb6a102544409cdedb2cc9032b3b","1f755d4f0ca94c4ca3504b00fe812db8","ba7a1184512d48be8125c18fa34182fb","2f0e8b41981b45ab8184601bc8ce43bf","fd7d5a26155c432980e8d8af4d308b71","5247ebeae5ef463c91820ae75753a995","fea1de75da2d4b44920d34c02c0fc58d","9c7322d7f0504713b95ed0fa0921b008","67689efc6c244fb58e97a39f16fbb356","1bb81d8545ea4daea22b21d5d6703a23","c48406f609064d7cb781bdcb431aa3ed","76525f9e332d46308e9a3f7c586699c5","57b16234f4034e498e8f5ac4d59e67ca","cceb1bc4b4d24816ae421f24b5b96b98","071081a597464f55812f505e531924ef","a0972d998d6545ec8e6b30a9edcb1736","d9c9cf6885e8402d84a6ef6dee35ac59","0575b30f4da84f379bd2cc5d5f3190ea","d158415735874c4e855aff4b4ac284b5","5662d8a14e1949f6a821f7d013ab600c","a8322aff5d2744509a435cfa7f29d574","d9d585df8d914b94b2455a445e846dbd","26bdcb86168e4c24bd80cfc44085ebba","ab9af3353c96402c9088984f06ad720d","08199005375444a781594d286578be46","6a3c8dfbd82d49e486cd079ffb2573bd","eae783c6f15a44fe85e6ae802787c06c","2c014f7b70614bd0bdbaad533b91733b","d9159920c64a4f1ba231c07de4e73e82","67a9d76161274bcdaacefbf6283b7018","b12f9a5275144f4c8ccbf7b072349443","eb831d3b82b14cb0a231036fd084fc96","112f612f8f7a48f595e223da1f9ee4e0","00d658a9dce34fc28b26263bb5154eb9","6484a68609014d83939253a0436268e7"]},"id":"kWjggPJuvt1z","executionInfo":{"status":"ok","timestamp":1712127859542,"user_tz":-540,"elapsed":10518,"user":{"displayName":"송문선","userId":"02769911744591628940"}},"outputId":"b0308591-ac3f-4ef0-f71f-9d2bc481ce96"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stderr","text":["/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:88: UserWarning: \n","The secret `HF_TOKEN` does not exist in your Colab secrets.\n","To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n","You will be able to reuse this secret in all of your notebooks.\n","Please note that authentication is recommended but still optional to access public models or datasets.\n"," warnings.warn(\n"]},{"output_type":"display_data","data":{"text/plain":["Downloading readme: 0%| | 0.00/22.5k [00:00, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"798850727cc8425eaf6175afb3775814"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["Downloading data: 0%| | 0.00/4.21M [00:00, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"57a73bef422341d3ac8eb092f6aecb14"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["Downloading data: 0%| | 0.00/1.06M [00:00, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"ba7a1184512d48be8125c18fa34182fb"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["Generating train split: 0%| | 0/21008 [00:00, ? examples/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"cceb1bc4b4d24816ae421f24b5b96b98"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["Generating validation split: 0%| | 0/5000 [00:00, ? examples/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"08199005375444a781594d286578be46"}},"metadata":{}},{"output_type":"stream","name":"stdout","text":["['B-DT', 'I-DT', 'B-LC', 'I-LC', 'B-OG', 'I-OG', 'B-PS', 'I-PS', 'B-QT', 'I-QT', 'B-TI', 'I-TI', 'O']\n"]}]},{"cell_type":"code","source":["from kobert_transformers import get_kobert_model, get_tokenizer\n","from transformers import BertForTokenClassification\n","\n","model_name = \"mmoonssun/klue_ner_kobert\"\n","model = BertForTokenClassification.from_pretrained(model_name, num_labels=13) # num_labels는 데이터셋의 라벨 수에 맞춰 조정\n","tokenizer = KoBERTTokenizer.from_pretrained('skt/kobert-base-v1')"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":167,"referenced_widgets":["5dc05eaa6fe34306ae2796d69d16fa68","97705820fa3b49ec8efe7bbf014b662e","eb7fb0ae872f484bbf518397d4c8dcdf","83283ced534a465b9074b3fe75a8b08b","dfa410ba3b724894ab59956a72d98295","63e5abaded4342519019f1de5770a076","2edd41dcd74e440b980099a3d559310a","67d2af8067d4443ba22c11afc38671f7","59d0ce4816644730b431bb7285df89f1","e18329aa4583426ab72de02174098b18","b7698e5108764415b8f174a4c2abf82b","9ccbbdd69f514c5fb754fa943f6cf1ad","959e73767aff4a0caf8472b13fcb1a4b","ef299b638e534afe9ed650a78f85ea59","2288b0eddbad47e597546036aafbd675","2fb1159d0c42467383adcf891620ca9d","de6771853ca843e1b55eaef438e717bb","6fad117f7b04450aa71c8e4303938c37","6c1cf9c5f0d7479daa5375ad513bab59","e0dfec409b014823b611f24c448d1185","06cceeec756945b48afc750cf2e9bd8b","4f922444e8624286b81a2d2f1a7acf77","e53a751d94134087a276e9bf71ca5716","f0078b915b4743358760179db2899105","b3d21512f52847d6bb56dd4eae3aeb5f","b0f27f0ea5f442b791ae7fa2c11f1cb3","79d37962b77c4e62acf6129ca71e9fca","098d8ab617ab4da88070a386136163af","39aa786ad32b489598f23409c3756cfc","c6787d1bd82944bebf94d4ca553d6dc8","b39fcdc03fef40b8be066a9ee3b7da43","f1bc4da9f18747bfa1d632916bdd252f","6129970c0f3e4211a8c0b9be920b2e2e"]},"id":"bCyP8tgbv5kU","executionInfo":{"status":"ok","timestamp":1712127868697,"user_tz":-540,"elapsed":6801,"user":{"displayName":"송문선","userId":"02769911744591628940"}},"outputId":"cefbaef8-8163-4020-8168-ba4ef23ce6ff"},"execution_count":null,"outputs":[{"output_type":"display_data","data":{"text/plain":["tokenizer_config.json: 0%| | 0.00/432 [00:00, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"5dc05eaa6fe34306ae2796d69d16fa68"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["spiece.model: 0%| | 0.00/371k [00:00, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"9ccbbdd69f514c5fb754fa943f6cf1ad"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["special_tokens_map.json: 0%| | 0.00/244 [00:00, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"e53a751d94134087a276e9bf71ca5716"}},"metadata":{}},{"output_type":"stream","name":"stderr","text":["The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. \n","The tokenizer class you load from this checkpoint is 'XLNetTokenizer'. \n","The class this function is called from is 'KoBERTTokenizer'.\n"]}]},{"cell_type":"code","source":["import re\n","\n","def predict_entities(text, model, tokenizer, id2tag):\n"," # GPU 사용 설정\n"," device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n"," model.to(device)\n","\n"," # 평가 모드로 설정\n"," model.eval()\n","\n"," # 입력 문장 토크나이징 및 텐서로 변환\n"," inputs = tokenizer(text, padding=True, truncation=True, return_tensors=\"pt\", max_length=512)\n"," input_ids = inputs[\"input_ids\"].to(device)\n"," attention_mask = inputs[\"attention_mask\"].to(device)\n","\n"," # 예측 수행\n"," with torch.no_grad():\n"," outputs = model(input_ids, attention_mask=attention_mask)\n"," logits = outputs.logits\n","\n"," # 예측 결과에서 가장 높은 확률을 가진 태그 ID를 추출\n"," predictions = torch.argmax(logits, dim=2)\n","\n"," # ID를 태그로 변환\n"," predicted_tags = [id2tag[id.item()] for id in predictions[0]]\n","\n"," # 토큰화된 텍스트와 예측된 태그 결합\n"," tokens = tokenizer.convert_ids_to_tokens(input_ids[0].tolist())\n"," token_tag_pairs = [(token, tag) for token, tag in zip(tokens, predicted_tags) if token not in [\"[CLS]\", \"[SEP]\", \"[PAD]\", \"\"]]\n","\n"," # '▁' 문자를 공백으로 대체하여 보다 자연스러운 출력을 생성\n"," token_tag_pairs = [(token.replace('▁', ' '), tag) for token, tag in token_tag_pairs]\n","\n"," return token_tag_pairs\n","\n","def find_career_status(text):\n"," # '경력' 다음에 오는 '유', '무', '없', '있' 찾기\n"," pattern = r'경력\\s*:\\s*(유|무|없|있)'\n","\n"," # 문자열에서 패턴에 해당하는 부분 찾기\n"," match = re.search(pattern, text)\n","\n"," # 찾은 값을 변수에 저장하고 처리\n"," if match:\n"," raw_career = match.group(1) # 첫 번째 그룹(유|무|없|있)을 추출\n"," # '없'이나 '있'을 각각 '무', '유'로 변환\n"," if raw_career == '없':\n"," career = '무'\n"," elif raw_career == '있':\n"," career = '유'\n"," else:\n"," career = raw_career\n","\n"," return career\n"," else:\n"," return \"경력 유무를 찾을 수 없습니다.\"\n","\n","def find_phone_number(text):\n"," # 정규 표현식으로 전화번호 패턴 찾기\n"," # 패턴 설명: '010'으로 시작하며, '-'가 있을 수도 있고 없을 수도 있으며, 숫자가 연속으로 나타남\n"," pattern = r'010-?\\d{4}-?\\d{4}'\n","\n"," # 문자열에서 패턴에 해당하는 부분 찾기\n"," match = re.search(pattern, text)\n","\n"," # 찾은 전화번호를 변수에 저장하고 출력\n"," if match:\n"," phone_number = match.group()\n"," return phone_number\n"," else:\n"," return \"전화번호를 찾을 수 없습니다.\""],"metadata":{"id":"BPaJ4r6PwEbG"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# 예시 문장\n","text = \"김준호 경기도 과천시 ,88살 .전화번호는 010-2093-6627, 경력 : 유\"\n","\n","# 모델을 사용하여 문장에서 개체 추출\n","predicted_entities = predict_entities(text, model, tokenizer,id2tag)\n","career = find_career_status(text)\n","phone_number = find_phone_number(text)\n","# 결과 출력\n","print(predicted_entities)\n","print(career)\n","print(phone_number)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"c573a34EwF0a","executionInfo":{"status":"ok","timestamp":1712127877232,"user_tz":-540,"elapsed":737,"user":{"displayName":"송문선","userId":"02769911744591628940"}},"outputId":"2a30be33-440d-453a-b770-c50a03a8ebc9"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["[(' 김', 'B-PS'), ('준호', 'B-PS'), (' 경기도', 'B-LC'), (' ', 'B-LC'), ('과', 'B-LC'), ('천', 'B-LC'), ('시', 'B-LC'), (' ', 'B-LC'), (',', 'O'), ('88', 'B-QT'), ('살', 'O'), (' ', 'O'), ('.', 'O'), ('전', 'O'), ('화', 'O'), ('번호', 'O'), ('는', 'O'), (' 0', 'O'), ('10', 'O'), ('-20', 'O'), ('93', 'O'), ('-', 'O'), ('66', 'O'), ('27', 'O'), (',', 'O'), (' 경력', 'O'), (' :', 'O'), (' 유', 'O')]\n","유\n","010-2093-6627\n"]}]},{"cell_type":"code","source":["# 개체 유형별로 정보를 추출하고 결합하는 함수 정의\n","def extract_and_combine_entities(predicted_entities):\n"," name = \"\"\n"," location = \"\"\n"," age = \"\"\n","\n"," for token, tag in predicted_entities:\n"," if tag == 'B-PS': # 이름 추출\n"," name += token.strip()\n"," elif tag == 'B-LC': # 위치 추출\n"," if token == \" \":\n"," location += token\n"," else:\n"," location += token.strip()\n"," elif tag == 'B-QT': # 나이(수량) 추출\n"," age = token.strip()\n","\n"," # 결과 반환\n"," return {\"name\": name, \"location\": location, \"age\": age}\n","\n","# 함수를 사용하여 변수에 저장\n","entities_combined = extract_and_combine_entities(predicted_entities)\n","\n","# 결과 출력\n","entities_combined[\"career\"] = career\n","entities_combined[\"phone_number\"] = phone_number\n","print(entities_combined)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"bk9Rk0ctwH96","executionInfo":{"status":"ok","timestamp":1712127881298,"user_tz":-540,"elapsed":353,"user":{"displayName":"송문선","userId":"02769911744591628940"}},"outputId":"991fb040-5652-4a92-8aff-0dea8844b338"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["{'name': '김준호', 'location': '경기도 과천시 ', 'age': '88', 'career': '유', 'phone_number': '010-2093-6627'}\n"]}]},{"cell_type":"code","source":["pip install pymongo"],"metadata":{"id":"O-FrIWDOwKUK"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["from pymongo import MongoClient\n","\n","# MongoDB 서버에 연결. 여기서는 로컬 MongoDB 인스턴스를 사용합니다.\n","client = MongoClient('mongodb://localhost:27017/')\n","\n","# 'mydatabase'라는 데이터베이스 선택. 없다면 새로 생성됩니다.\n","db = client['mydatabase']\n","\n","# 'people'이라는 컬렉션 선택. 없다면 새로 생성됩니다.\n","collection = db['people']\n","\n","# 딕셔너리를 MongoDB 컬렉션에 삽입\n","result = collection.insert_one(entities_combined)\n","\n","# 삽입된 문서의 ID 출력\n","print(\"Inserted document ID:\", result.inserted_id)"],"metadata":{"id":"cThEDFYYwM4f"},"execution_count":null,"outputs":[]}]}
\ No newline at end of file
diff --git a/DataExtract/colab/logs_retrained/events.out.tfevents.1711953590.e19a0f9d0fda.879.0 b/DataExtract/colab/logs_retrained/events.out.tfevents.1711953590.e19a0f9d0fda.879.0
new file mode 100644
index 0000000000..6e8818f649
Binary files /dev/null and b/DataExtract/colab/logs_retrained/events.out.tfevents.1711953590.e19a0f9d0fda.879.0 differ
diff --git a/DataExtract/colab/logs_retrained/events.out.tfevents.1712073374.fc08e119c972.149.0 b/DataExtract/colab/logs_retrained/events.out.tfevents.1712073374.fc08e119c972.149.0
new file mode 100644
index 0000000000..fe3bcb5eb1
Binary files /dev/null and b/DataExtract/colab/logs_retrained/events.out.tfevents.1712073374.fc08e119c972.149.0 differ
diff --git a/DataExtract/colab/logs_retrained/events.out.tfevents.1712136011.ebca3ff1491d.752.0 b/DataExtract/colab/logs_retrained/events.out.tfevents.1712136011.ebca3ff1491d.752.0
new file mode 100644
index 0000000000..66d6a152c3
Binary files /dev/null and b/DataExtract/colab/logs_retrained/events.out.tfevents.1712136011.ebca3ff1491d.752.0 differ
diff --git a/DataExtract/dummy/db_score.py b/DataExtract/dummy/db_score.py
new file mode 100644
index 0000000000..c735018056
--- /dev/null
+++ b/DataExtract/dummy/db_score.py
@@ -0,0 +1,34 @@
+# db_store.py
+
+from model import load_model_and_tokenizer, predict_entities
+from data_processing import find_career_status, find_phone_number, extract_and_combine_entities
+from datasets import load_dataset
+from config.db import connect_db, get_collection
+
+# MongoDB 데이터베이스 연결
+db = connect_db()
+collection = db['ExtractedEntities'] # 원하는 컬렉션 이름을 지정
+
+# KLUE NER 데이터셋 로드
+dataset = load_dataset("klue", "ner")
+tag_list = dataset['train'].features['ner_tags'].feature.names
+tag2id = {tag: id for id, tag in enumerate(tag_list)}
+id2tag = {id: tag for tag, id in tag2id.items()}
+
+# 모델 및 토크나이저 로드
+model, tokenizer = load_model_and_tokenizer()
+
+# 예시 텍스트
+text = "25/ 김준호 /서초구 거주/경력:유/전화번호:010-0000-0000"
+
+# 엔티티 추출 및 결합
+predicted_entities = predict_entities(text, model, tokenizer, id2tag)
+entities_combined = extract_and_combine_entities(predicted_entities)
+entities_combined["career"] = find_career_status(text)
+entities_combined["phonenumber"] = find_phone_number(text)
+entities_combined["sex"] = "남"
+entities_combined["RRN"] = "000000-0000000"
+
+# 데이터 MongoDB에 저장
+insert_result = collection.insert_one(entities_combined)
+print(f"삽입된 문서 ID: {insert_result.inserted_id}")
diff --git a/DataExtract/dummy/example.py b/DataExtract/dummy/example.py
new file mode 100644
index 0000000000..e0e1043ba1
--- /dev/null
+++ b/DataExtract/dummy/example.py
@@ -0,0 +1,9 @@
+from config.db import connect_db
+import pprint
+
+db = connect_db()
+collection = db['ExtractedEntities']
+db.list_collection_names()
+
+for document in collection.find():
+ print(document)
\ No newline at end of file
diff --git a/DataExtract/dummy/rawcode.py b/DataExtract/dummy/rawcode.py
new file mode 100644
index 0000000000..ad2dea32aa
--- /dev/null
+++ b/DataExtract/dummy/rawcode.py
@@ -0,0 +1,133 @@
+from transformers import BertTokenizerFast, BertForTokenClassification, AdamW
+from transformers import Trainer, TrainingArguments
+from datasets import load_dataset, load_metric
+import torch
+from torch.utils.data import DataLoader
+from torch.nn.utils.rnn import pad_sequence
+import numpy as np
+from kobert_tokenizer import KoBERTTokenizer
+from kobert_transformers import get_kobert_model, get_tokenizer
+from transformers import BertForTokenClassification
+
+# KLUE NER 데이터셋 로드
+dataset = load_dataset("klue", "ner")
+
+# 태그 리스트 확인
+tag_list = dataset['train'].features['ner_tags'].feature.names
+print(tag_list)
+
+# tag2id 및 id2tag 사전 생성
+tag2id = {tag: id for id, tag in enumerate(tag_list)}
+id2tag = {id: tag for tag, id in tag2id.items()}
+
+model_name = "mmoonssun/klue_ner_kobert"
+model = BertForTokenClassification.from_pretrained(model_name, num_labels=13) # num_labels는 데이터셋의 라벨 수에 맞춰 조정
+tokenizer = KoBERTTokenizer.from_pretrained('skt/kobert-base-v1')
+
+import re
+
+def predict_entities(text, model, tokenizer, id2tag):
+ # GPU 사용 설정
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+ model.to(device)
+
+ # 평가 모드로 설정
+ model.eval()
+
+ # 입력 문장 토크나이징 및 텐서로 변환
+ inputs = tokenizer(text, padding=True, truncation=True, return_tensors="pt", max_length=512)
+ input_ids = inputs["input_ids"].to(device)
+ attention_mask = inputs["attention_mask"].to(device)
+
+ # 예측 수행
+ with torch.no_grad():
+ outputs = model(input_ids, attention_mask=attention_mask)
+ logits = outputs.logits
+
+ # 예측 결과에서 가장 높은 확률을 가진 태그 ID를 추출
+ predictions = torch.argmax(logits, dim=2)
+
+ # ID를 태그로 변환
+ predicted_tags = [id2tag[id.item()] for id in predictions[0]]
+
+ # 토큰화된 텍스트와 예측된 태그 결합
+ tokens = tokenizer.convert_ids_to_tokens(input_ids[0].tolist())
+ token_tag_pairs = [(token, tag) for token, tag in zip(tokens, predicted_tags) if token not in ["[CLS]", "[SEP]", "[PAD]", ""]]
+
+ # '▁' 문자를 공백으로 대체하여 보다 자연스러운 출력을 생성
+ token_tag_pairs = [(token.replace('▁', ' '), tag) for token, tag in token_tag_pairs]
+
+ return token_tag_pairs
+
+def find_career_status(text):
+ # '경력' 다음에 오는 '유', '무', '없', '있' 찾기
+ pattern = r'경력\s*:\s*(유|무|없|있)'
+
+ # 문자열에서 패턴에 해당하는 부분 찾기
+ match = re.search(pattern, text)
+
+ # 찾은 값을 변수에 저장하고 처리
+ if match:
+ raw_career = match.group(1) # 첫 번째 그룹(유|무|없|있)을 추출
+ # '없'이나 '있'을 각각 '무', '유'로 변환
+ if raw_career == '없':
+ career = '무'
+ elif raw_career == '있':
+ career = '유'
+ else:
+ career = raw_career
+
+ return career
+ else:
+ return "경력 유무를 찾을 수 없습니다."
+
+def find_phone_number(text):
+ # 정규 표현식으로 전화번호 패턴 찾기
+ # 패턴 설명: '010'으로 시작하며, '-'가 있을 수도 있고 없을 수도 있으며, 숫자가 연속으로 나타남
+ pattern = r'010-?\d{4}-?\d{4}'
+
+ # 문자열에서 패턴에 해당하는 부분 찾기
+ match = re.search(pattern, text)
+
+ # 찾은 전화번호를 변수에 저장하고 출력
+ if match:
+ phone_number = match.group()
+ return phone_number
+ else:
+ return "전화번호를 찾을 수 없습니다."
+
+def extract_and_combine_entities(predicted_entities):
+ name = ""
+ location = ""
+ age = ""
+
+ for token, tag in predicted_entities:
+ if tag == 'B-PS': # 이름 추출
+ name += token.strip()
+ elif tag == 'B-LC': # 위치 추출
+ if token == " ":
+ location += token
+ else:
+ location += token.strip()
+ elif tag == 'B-QT' or tag == 'B-DT': # 나이(수량) 추출
+ age = token.strip()
+
+ # 결과 반환
+ return {"name": name, "location": location, "age": age}
+
+# 예시 문장
+text = "25/ 김준호 /서초구 거주/경력:유/전화번호:010-0000-0000"
+
+# 모델을 사용하여 문장에서 개체 추출
+predicted_entities = predict_entities(text, model, tokenizer, id2tag)
+career = find_career_status(text)
+phone_number = find_phone_number(text)
+
+# 함수를 사용하여 변수에 저장
+entities_combined = extract_and_combine_entities(predicted_entities)
+
+# 결과 출력
+entities_combined["career"] = career
+entities_combined["phone_number"] = phone_number
+
+print(entities_combined)
diff --git a/DataExtract/package.json b/DataExtract/package.json
new file mode 100644
index 0000000000..d8aff416ae
--- /dev/null
+++ b/DataExtract/package.json
@@ -0,0 +1,15 @@
+{
+ "name": "capstone-2024-35",
+ "version": "1.0.0",
+ "description": "Capstone Project 2024-35",
+ "main": "index.js",
+ "scripts": {
+ "start": "node src/mongodb_store.js"
+ },
+ "dependencies": {
+ "dotenv": "^16.0.0",
+ "mongodb": "^4.10.0"
+ },
+ "author": "",
+ "license": "ISC"
+ }
\ No newline at end of file
diff --git a/DataExtract/src/__init__.py b/DataExtract/src/__init__.py
new file mode 100644
index 0000000000..8b13789179
--- /dev/null
+++ b/DataExtract/src/__init__.py
@@ -0,0 +1 @@
+
diff --git a/DataExtract/src/__pycache__/data_processing.cpython-310.pyc b/DataExtract/src/__pycache__/data_processing.cpython-310.pyc
new file mode 100644
index 0000000000..9966b02f73
Binary files /dev/null and b/DataExtract/src/__pycache__/data_processing.cpython-310.pyc differ
diff --git a/DataExtract/src/__pycache__/employee.cpython-310.pyc b/DataExtract/src/__pycache__/employee.cpython-310.pyc
new file mode 100644
index 0000000000..127c86afcd
Binary files /dev/null and b/DataExtract/src/__pycache__/employee.cpython-310.pyc differ
diff --git a/DataExtract/src/__pycache__/model.cpython-310.pyc b/DataExtract/src/__pycache__/model.cpython-310.pyc
new file mode 100644
index 0000000000..b7b2e92730
Binary files /dev/null and b/DataExtract/src/__pycache__/model.cpython-310.pyc differ
diff --git a/DataExtract/src/__pycache__/model.cpython-39.pyc b/DataExtract/src/__pycache__/model.cpython-39.pyc
new file mode 100644
index 0000000000..0c89d8ac7f
Binary files /dev/null and b/DataExtract/src/__pycache__/model.cpython-39.pyc differ
diff --git a/DataExtract/src/config/__pycache__/db.cpython-310.pyc b/DataExtract/src/config/__pycache__/db.cpython-310.pyc
new file mode 100644
index 0000000000..2c30ec1562
Binary files /dev/null and b/DataExtract/src/config/__pycache__/db.cpython-310.pyc differ
diff --git a/DataExtract/src/config/db.py b/DataExtract/src/config/db.py
new file mode 100644
index 0000000000..cc361b30bf
--- /dev/null
+++ b/DataExtract/src/config/db.py
@@ -0,0 +1,18 @@
+# src/config/db.py
+
+import os
+from dotenv import load_dotenv
+from pymongo import MongoClient
+
+# 환경 변수 로드
+load_dotenv()
+mongodb_uri = os.getenv('MONGODB_URI')
+
+# MongoDB 연결 설정
+def connect_db():
+ client = MongoClient(mongodb_uri)
+ db = client['Authusers'] # 데이터베이스 이름을 여기에서 변경 가능
+ return db
+def get_collection(collection_name):
+ db = connect_db()
+ return db[collection_name]
\ No newline at end of file
diff --git a/DataExtract/src/data_processing.py b/DataExtract/src/data_processing.py
new file mode 100644
index 0000000000..b3ca521959
--- /dev/null
+++ b/DataExtract/src/data_processing.py
@@ -0,0 +1,38 @@
+import re
+
+def find_career_status(text):
+ pattern = r'(경력\s*(:|은|는|이|가| )?\s*[^,\.]*|없|있|무|유)'
+ match = re.search(pattern, text)
+ if match:
+ full_text = match.group(1).strip()
+ # '유' 또는 '있'으로 시작하는 경우 '유' 반환
+ if full_text.startswith('유') or full_text.startswith('있'):
+ return '유'
+ # '무' 또는 '없'으로 시작하는 경우 '무' 반환
+ elif full_text.startswith('무') or full_text.startswith('없'):
+ return '무'
+ else:
+ # 조건에 맞지 않는 경우 전체 텍스트 반환
+ return full_text
+ return "경력 유무를 찾을 수 없습니다."
+
+def find_phone_number(text):
+ pattern = r'010-?\d{4}-?\d{4}'
+ match = re.search(pattern, text)
+ return match.group() if match else "전화번호를 찾을 수 없습니다."
+
+def extract_and_combine_entities(predicted_entities):
+ entity_info = {"name": "", "local": "", "age": ""}
+ for token, tag in predicted_entities:
+ if tag == 'B-PS':
+ entity_info["name"] += token.strip() + " " # 이름 분리
+ elif tag == 'B-LC':
+ entity_info["local"] += token.strip() if token != " " else token
+ elif tag in ['B-QT', 'B-DT']:
+ entity_info["age"] += token.strip() + " "
+ entity_info["name"] = entity_info["name"].strip() # 공백 제거
+ #entity_info["local"] = entity_info["local"].strip()
+ entity_info["age"] = entity_info["age"].strip()
+
+
+ return entity_info
diff --git a/DataExtract/src/employee.py b/DataExtract/src/employee.py
new file mode 100644
index 0000000000..6ecf148fb2
--- /dev/null
+++ b/DataExtract/src/employee.py
@@ -0,0 +1,68 @@
+# employee.py
+
+from datetime import datetime
+from bson import ObjectId
+from config.db import get_collection
+
+class Employee:
+ def __init__(self, user, name, sex, local, rrn,career,age, created_at=None, updated_at=None):
+ self.user = ObjectId(user)
+ self.name = name
+ self.sex = sex
+ self.local = local
+ self.rrn = rrn # 주민등록번호
+ self.age = age
+ self.career = career
+ self.created_at = created_at if created_at else datetime.utcnow()
+ self.updated_at = updated_at if updated_at else datetime.utcnow()
+
+ def to_dict(self):
+ return {
+ 'user': self.user,
+ 'name': self.name,
+ 'sex': self.sex,
+ 'local': self.local,
+ 'RRN': self.rrn,
+ 'age': self.age,
+ 'career': self.career,
+ #'phonenumber': self.phonenumber,
+ 'createdAt': self.created_at,
+ 'updatedAt': self.updated_at
+ }
+
+class EmployeeRepository:
+ """
+ Employee 데이터를 관리하는 저장소 클래스.
+ """
+ def __init__(self):
+ self.collection = get_collection('ExtractedEntities')
+
+ def insert(self, employee: Employee):
+ """
+ 새로운 Employee를 삽입합니다.
+ """
+ self.collection.insert_one(employee.to_dict())
+
+ def find_all(self):
+ """
+ 모든 Employee 데이터를 반환합니다.
+ """
+ return list(self.collection.find())
+
+ def find_by_name(self, name):
+ """
+ 이름으로 Employee 데이터를 찾습니다.
+ """
+ return list(self.collection.find({'name': name}))
+
+ def update(self, employee_id, updated_fields):
+ """
+ 주어진 Employee ID의 데이터를 업데이트합니다.
+ """
+ self.collection.update_one({'_id': ObjectId(employee_id)}, {'$set': updated_fields})
+
+ def delete(self, employee_id):
+ """
+ 주어진 Employee ID의 데이터를 삭제합니다.
+ """
+ self.collection.delete_one({'_id': ObjectId(employee_id)})
\ No newline at end of file
diff --git a/DataExtract/src/main.py b/DataExtract/src/main.py
new file mode 100644
index 0000000000..4641d76432
--- /dev/null
+++ b/DataExtract/src/main.py
@@ -0,0 +1,87 @@
+# db_store.py
+
+from model import load_model_and_tokenizer, predict_entities
+from data_processing import find_career_status, find_phone_number, extract_and_combine_entities
+from datasets import load_dataset
+from config.db import connect_db, get_collection
+from employee import Employee, EmployeeRepository
+from flask import Flask, request, render_template, redirect, url_for
+
+app = Flask(__name__)
+
+# MongoDB 데이터베이스 연결
+db = connect_db()
+collection = get_collection('ExtractedEntities') # 원하는 컬렉션 이름을 지정
+
+
+# KLUE NER 데이터셋 로드
+dataset = load_dataset("klue", "ner")
+tag_list = dataset['train'].features['ner_tags'].feature.names
+tag2id = {tag: id for id, tag in enumerate(tag_list)}
+id2tag = {id: tag for tag, id in tag2id.items()}
+
+# 모델 및 토크나이저 로드
+model, tokenizer = load_model_and_tokenizer()
+
+# 예시 텍스트
+#text = "송문선 / 24/서초구 거주/경력은 사무실 철거 해봤습니다."
+
+@app.route('/', methods=['GET', 'POST'])
+def index():
+ if request.method == 'POST':
+ text = request.form['text']
+ if text:
+ combine(text)
+ return redirect(url_for('index'))
+ return render_template('index.html')
+
+
+
+def combine(text):
+ predicted_entities = predict_entities(text, model, tokenizer, id2tag)
+ entities_combined = extract_and_combine_entities(predicted_entities)
+ entities_combined["career"] = find_career_status(text)
+ entities_combined["phonenumber"] = "010-0000-0000"
+ entities_combined["sex"] = "남"
+ entities_combined["RRN"] = "000000-0000000"
+ entities_combined["name"] = entities_combined["name"].replace(' ', '')
+ entities_combined["name"] = entities_combined["name"].replace(',', '')
+ entities_combined["age"] = entities_combined["age"].replace(' ', '')
+ entities_combined["age"] = entities_combined["age"].replace(',', '')
+ entities_combined["local"] = entities_combined["local"].replace(' ', '')
+ entities_combined["local"] = entities_combined["local"].replace(',', '')
+ user_id = '609b8b8f8e4f5b88f8e8e8e8'
+ new_employee = Employee(
+ user=user_id,
+ name=entities_combined["name"],
+ sex=entities_combined["sex"],
+ local=entities_combined["local"],
+ rrn=entities_combined["RRN"],
+ career = entities_combined["career"],
+ age = entities_combined["age"]
+ )
+ #데이터 MongoDB에 저장
+ employee_repo = EmployeeRepository()
+ employee_repo.insert(new_employee)
+ print(entities_combined)
+
+@app.route('/employees', methods=['GET'])
+def list_employees():
+ employee_repo = EmployeeRepository()
+ employees = employee_repo.find_all()
+ return render_template('employees.html', employees=employees)
+
+if __name__ == '__main__':
+ app.run(debug=True)
+'''
+while(1):
+ text = input('메시지 내용 : ')
+ if text == 'quit':
+ break
+ combine(text)
+
+employee_repo = EmployeeRepository()
+for employee in employee_repo.find_all():
+ print(employee)
+
+'''
diff --git a/DataExtract/src/model.py b/DataExtract/src/model.py
new file mode 100644
index 0000000000..c0e62192c1
--- /dev/null
+++ b/DataExtract/src/model.py
@@ -0,0 +1,34 @@
+from transformers import BertForTokenClassification
+from kobert_tokenizer import KoBERTTokenizer
+import torch
+import re
+
+def load_model_and_tokenizer():
+ model_name = "mmoonssun/klue_ner_kobert"
+ model = BertForTokenClassification.from_pretrained(model_name, num_labels=13)
+ tokenizer = KoBERTTokenizer.from_pretrained('skt/kobert-base-v1')
+ return model, tokenizer
+def preprocess_text(text):
+ # 숫자 앞뒤에 공백 추가
+ text = re.sub(r'(\d+)', r' \1 ', text)
+ text = text.replace('/', ' , ')
+ # 중복 공백 제거
+ text = re.sub(r'\s+', ' ', text).strip()
+ return text
+def predict_entities(text, model, tokenizer, id2tag):
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+ model.to(device)
+ model.eval()
+ text = preprocess_text(text) # 텍스트 전처리 적용
+ inputs = tokenizer(text, padding=True, truncation=True, return_tensors="pt", max_length=512)
+ input_ids = inputs["input_ids"].to(device)
+ attention_mask = inputs["attention_mask"].to(device)
+ with torch.no_grad():
+ outputs = model(input_ids, attention_mask=attention_mask)
+ logits = outputs.logits
+ predictions = torch.argmax(logits, dim=2)
+ predicted_tags = [id2tag[id.item()] for id in predictions[0]]
+ tokens = tokenizer.convert_ids_to_tokens(input_ids[0].tolist())
+ token_tag_pairs = [(token.replace('▁', ' '), tag) for token, tag in zip(tokens, predicted_tags) if token not in ["[CLS]", "[SEP]", "[PAD]", ""]]
+ print(token_tag_pairs)
+ return token_tag_pairs
diff --git a/DataExtract/src/requirements.txt b/DataExtract/src/requirements.txt
new file mode 100644
index 0000000000..8c5fbbbbfe
--- /dev/null
+++ b/DataExtract/src/requirements.txt
@@ -0,0 +1,8 @@
+torch
+transformers[torch]
+datasets
+seqeval
+kobert-transformers
+onnxruntime==1.8.0
+git+https://github.com/SKTBrain/KoBERT.git@master
+git+https://github.com/SKTBrain/KoBERT.git#egg=kobert_tokenizer&subdirectory=kobert_hf
diff --git a/DataExtract/src/templates/employees.html b/DataExtract/src/templates/employees.html
new file mode 100644
index 0000000000..8f0ccb707f
--- /dev/null
+++ b/DataExtract/src/templates/employees.html
@@ -0,0 +1,31 @@
+
+
+
+
+
+ List of Employees
+
+
+
Employees
+
+
+
Name
+
Sex
+
Local
+
RRN
+
Career
+
Age
+
+ {% for employee in employees %}
+
+
{{ employee.name }}
+
{{ employee.sex }}
+
{{ employee.local }}
+
{{ employee.rrn }}
+
{{ employee.career }}
+
{{ employee.age }}
+
+ {% endfor %}
+
+
+
\ No newline at end of file
diff --git a/DataExtract/src/templates/index.html b/DataExtract/src/templates/index.html
new file mode 100644
index 0000000000..4db3e16683
--- /dev/null
+++ b/DataExtract/src/templates/index.html
@@ -0,0 +1,16 @@
+
+
+
+
+
+ Entity Extraction and DB Insertion
+
+
+
Enter Text for Entity Extraction
+
+
+
\ No newline at end of file
diff --git a/DataExtract/src/utilities.py b/DataExtract/src/utilities.py
new file mode 100644
index 0000000000..8b13789179
--- /dev/null
+++ b/DataExtract/src/utilities.py
@@ -0,0 +1 @@
+
diff --git a/README.md b/README.md
index b0c42847c6..81876ccd68 100644
--- a/README.md
+++ b/README.md
@@ -1,76 +1,142 @@
-# Welcome to GitHub
+# 아웃소싱 회사를 위한 인력 pms 개발
+## 1.프로젝트 소개
+
+
+
+거래처 증가에 따른 현장의 인원 및 다양성 증가로 인하여 기존 일일이 수작업으로 문자 지원 받고 배치하기에 벅참을 느끼고 AI를 통하여 유연하고 자동화된 PMS를 고안하게 되었다.
-캡스톤 팀 생성을 축하합니다.
+기존 수작업으로 일일이 해당 현장에 맞는 인원을 생각하고 찾아 연락하고 배치하는 번거로움을 기존에 구축된 인적 DB를 바탕으로 구축된 시스템에 현장 주소와 간단한 업무 내용과 필요한 인원 수를 입력하면 해당 현장에 가장 적합한 인원을 자동 선별한다.
+이에 그치지 않고, 사용자가 원한다면 WEB발신을 통한 문자 전송까지도 가능케 한다. 더 나아가, 현장이 종료되면 매번 계좌 정보를 받아 일일이 급여 입금을 하는 행위 역시 상당한 시간이 소요되었는데, 인적정보에 더하여 최초 한번 계좌정보를 입력하여 두면, 간단한 클릭만으로 일괄 계좌이체 및 개별 계좌이체도 가능하게 할 예정이다.
-## 팀소개 및 페이지를 꾸며주세요.
+그리하여 이 시스템 하나로 원큐에 가능한 점이 많기에, 시간 효율성이 증가하고 이는 즉 다른 업무 볼 시간 증대로 이어진다. 그렇게 되면 장기적으로는 매출 증가 및 수익 향상을 이뤄낼 수 있다. 즉, 아웃소싱 업체에서 하는 일련의 모든 과정이 우리의 시스템 하나로 알맞고 빠르고 편하게 이루어지는 것에 그 궁극적인 목표가 있다.
-- 프로젝트 소개
- - 프로젝트 설치방법 및 데모, 사용방법, 프리뷰등을 readme.md에 작성.
- - Api나 사용방법등 내용이 많을경우 wiki에 꾸미고 링크 추가.
+## 2. 소개영상
-- 팀페이지 꾸미기
- - 프로젝트 소개 및 팀원 소개
- - index.md 예시보고 수정.
-
-- GitHub Pages 리파지토리 Settings > Options > GitHub Pages
- - Source를 marster branch
- - Theme Chooser에서 태마선택
- - 수정후 팀페이지 확인하여 점검.
-
-**팀페이지 주소** -> https://kookmin-sw.github.io/ '{{자신의 리파지토리 아이디}}'
-
-**예시)** 2023년 0조 https://kookmin-sw.github.io/capstone-2023-00/
-
-
-## 내용에 아래와 같은 내용들을 추가하세요.
-
-### 1. 프로잭트 소개
-
-프로젝트
-
-### 2. 소개 영상
-
-프로젝트 소개하는 영상을 추가하세요
-
-### 3. 팀 소개
-
-팀을 소개하세요.
-
-팀원정보 및 담당이나 사진 및 SNS를 이용하여 소개하세요.
-
-### 4. 사용법
-
-소스코드제출시 설치법이나 사용법을 작성하세요.
-
-### 5. 기타
-
-추가적인 내용은 자유롭게 작성하세요.
-
-
-## Markdown을 사용하여 내용꾸미기
-
-Markdown은 작문을 스타일링하기위한 가볍고 사용하기 쉬운 구문입니다. 여기에는 다음을위한 규칙이 포함됩니다.
+## 3. 팀 소개
+
\ No newline at end of file
diff --git a/src/views/about.ejs b/src/views/about.ejs
new file mode 100644
index 0000000000..6482488a84
--- /dev/null
+++ b/src/views/about.ejs
@@ -0,0 +1,55 @@
+
+
정산
+
+
+
+
+
+
+
+
현장명
+
주소
+
소재지
+
급여
+
업무
+
날짜
+
Action
+
+
+
+
+ <% worksites.forEach(worksite => { %>
+
+
<%= worksite.name %>
+
<%= worksite.address %>
+
<%= worksite.local %>
+
<%= worksite.salary %>
+
<%= worksite.worktype %>
+
<%= moment(worksite.date).format("YYYY년 MMMM Do a hh:mm") %>
\ No newline at end of file
diff --git a/src/views/dashboard/add.ejs b/src/views/dashboard/add.ejs
new file mode 100644
index 0000000000..794abe2c12
--- /dev/null
+++ b/src/views/dashboard/add.ejs
@@ -0,0 +1,33 @@
+
+
+
+
+
+
+
Add Note
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/src/views/dashboard/index.ejs b/src/views/dashboard/index.ejs
new file mode 100644
index 0000000000..1912ed32f6
--- /dev/null
+++ b/src/views/dashboard/index.ejs
@@ -0,0 +1,69 @@
+
\ No newline at end of file
diff --git a/src/views/dashboard/search.ejs b/src/views/dashboard/search.ejs
new file mode 100644
index 0000000000..b3d4044b1c
--- /dev/null
+++ b/src/views/dashboard/search.ejs
@@ -0,0 +1,18 @@
+
diff --git a/src/views/dashboard/view-note.ejs b/src/views/dashboard/view-note.ejs
new file mode 100644
index 0000000000..2dd5374013
--- /dev/null
+++ b/src/views/dashboard/view-note.ejs
@@ -0,0 +1,65 @@
+
+
+
+
+
+
+
View Note
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
You are about to delete a note
+
+
+
+
+ This will delete your note <%= note.title %>
+ Are you sure?
+
+
+
+
+
+
\ No newline at end of file
diff --git a/src/views/employee/addemployee.ejs b/src/views/employee/addemployee.ejs
new file mode 100644
index 0000000000..d65b592bef
--- /dev/null
+++ b/src/views/employee/addemployee.ejs
@@ -0,0 +1,76 @@
+
+
작업자
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/src/views/employee/editemployee.ejs b/src/views/employee/editemployee.ejs
new file mode 100644
index 0000000000..1c8e43ffe0
--- /dev/null
+++ b/src/views/employee/editemployee.ejs
@@ -0,0 +1,105 @@
+
+
작업자 정보 수정 <%= employee.name %>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 마지막 업데이트: <%= new Date(employee.updatedAt).toUTCString() %>
+ UserId: <%= employee._id %>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
You are about to remove a employee record.
+
+
+
+
+ 이는 작업자의 기록을 제거할 것 입니다 <%= employee.name %>
+ 확실합니까?
+
+
+
+
+
+
\ No newline at end of file
diff --git a/src/views/employee/employee.ejs b/src/views/employee/employee.ejs
new file mode 100644
index 0000000000..c9937a0847
--- /dev/null
+++ b/src/views/employee/employee.ejs
@@ -0,0 +1,103 @@
+
+
+
\ No newline at end of file
diff --git a/src/views/index.ejs b/src/views/index.ejs
new file mode 100644
index 0000000000..f14fef6fb6
--- /dev/null
+++ b/src/views/index.ejs
@@ -0,0 +1,17 @@
+
\ No newline at end of file
diff --git a/src/views/partials/header_dashboard.ejs b/src/views/partials/header_dashboard.ejs
new file mode 100644
index 0000000000..bfe8b29cf5
--- /dev/null
+++ b/src/views/partials/header_dashboard.ejs
@@ -0,0 +1,21 @@
+
+
+
+
\ No newline at end of file
diff --git a/src/views/worksite/addworksite.ejs b/src/views/worksite/addworksite.ejs
new file mode 100644
index 0000000000..0e8ba18f06
--- /dev/null
+++ b/src/views/worksite/addworksite.ejs
@@ -0,0 +1,110 @@
+
+
작업현장 추가
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/src/views/worksite/editWorksite.ejs b/src/views/worksite/editWorksite.ejs
new file mode 100644
index 0000000000..57ddbcfa0e
--- /dev/null
+++ b/src/views/worksite/editWorksite.ejs
@@ -0,0 +1,108 @@
+
+
<%= worksite.name %>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/src/views/worksite/matchToWorksite.ejs b/src/views/worksite/matchToWorksite.ejs
new file mode 100644
index 0000000000..dcc9dc1f69
--- /dev/null
+++ b/src/views/worksite/matchToWorksite.ejs
@@ -0,0 +1,181 @@
+<% employees.forEach(employee => { %>
+
+
+
+
+
+
You are about to hire a employee.
+
+
+
+
+ <%= employee.name %> 인원을 등록하시겠습니까?
+
+
+
+
+
+
+
+<% }); %>
+
+
+
+
<%= worksite.name %>
+
+
+
+
+
+
+
+
+
+
+ 마지막 업데이트: <%= moment(worksite.updatedAt).format("YYYY년 MMMM Do (dd) a hh:mm") %> (<%= moment(worksite.updatedAt).fromNow() %>)
+ UserId: <%= worksite._id %>
+
+
+
+
+
+
+
+
+
현장명
+
<%= worksite.name %>
+
+
+
+
+
+
주소
+
<%= worksite.address %>
+
+
+
+
+
+
날짜
+
<%= moment(worksite.date).format("YYYY년 MMMM Do (dd) a hh:mm")%>
+
+
+
+
+
+
+
+
업무
+
<%= worksite.worktype %>
+
+
+
+
+
+
근무시간
+
<%= moment(worksite.end).diff(worksite.date, 'hours')%> 시간