|
188 | 188 | "train_dataset, test_dataset = load_dataset(\"ccdv/pubmed-summarization\", split=[\"train\", \"test\"])\n"
|
189 | 189 | ]
|
190 | 190 | },
|
| 191 | + { |
| 192 | + "cell_type": "markdown", |
| 193 | + "id": "3399abb1-af8f-46ee-92ea-c8344eeddd09", |
| 194 | + "metadata": {}, |
| 195 | + "source": [ |
| 196 | + "## Finetuning our Model Locally" |
| 197 | + ] |
| 198 | + }, |
191 | 199 | {
|
192 | 200 | "cell_type": "markdown",
|
193 | 201 | "id": "ed6ddff1-2636-4e3b-88ee-e3c86c584245",
|
|
210 | 218 | "outputs": [],
|
211 | 219 | "source": [
|
212 | 220 | "from transformers import AutoModelForSeq2SeqLM, AutoTokenizer\n",
|
| 221 | + "model_name=\"google/flan-t5-small\"\n", |
213 | 222 | "\n",
|
214 |
| - "model = AutoModelForSeq2SeqLM.from_pretrained(\"google/flan-t5-small\")\n", |
215 |
| - "tokenizer = AutoTokenizer.from_pretrained(\"google/flan-t5-small\")" |
| 223 | + "model = AutoModelForSeq2SeqLM.from_pretrained(model_name)\n", |
| 224 | + "tokenizer = AutoTokenizer.from_pretrained(model_name)" |
216 | 225 | ]
|
217 | 226 | },
|
218 | 227 | {
|
|
253 | 262 | "test_dataset.set_format(\"torch\", columns=[\"input_ids\", \"attention_mask\", \"abstracts\"])"
|
254 | 263 | ]
|
255 | 264 | },
|
| 265 | + { |
| 266 | + "cell_type": "markdown", |
| 267 | + "id": "b3ffd612-abde-4666-8c85-cc7069de2129", |
| 268 | + "metadata": {}, |
| 269 | + "source": [ |
| 270 | + "The first step to training our model other than setting up our datasets is to set our **hyperparameters**. Hyperparameters depend on your training script and for this one we need to identify our model, the location of our train and test files, etc. iN this case we are using a one created by Hugging Face." |
| 271 | + ] |
| 272 | + }, |
| 273 | + { |
| 274 | + "cell_type": "code", |
| 275 | + "execution_count": null, |
| 276 | + "id": "c06bef19-cc3c-476f-943c-78368e9f49e8", |
| 277 | + "metadata": {}, |
| 278 | + "outputs": [], |
| 279 | + "source": [ |
| 280 | + "from transformers import TrainingArguments\n", |
| 281 | + "\n", |
| 282 | + "training_args = TrainingArguments(output_dir=\"test_trainer\")" |
| 283 | + ] |
| 284 | + }, |
| 285 | + { |
| 286 | + "cell_type": "markdown", |
| 287 | + "id": "cff31d69-9f54-4235-a377-7c5e758fbca8", |
| 288 | + "metadata": {}, |
| 289 | + "source": [ |
| 290 | + "Next create setting to evaluate the models accuracy." |
| 291 | + ] |
| 292 | + }, |
| 293 | + { |
| 294 | + "cell_type": "code", |
| 295 | + "execution_count": null, |
| 296 | + "id": "24bbe62e-9140-4bef-88ae-3e5029ddb25c", |
| 297 | + "metadata": {}, |
| 298 | + "outputs": [], |
| 299 | + "source": [ |
| 300 | + "import numpy as np\n", |
| 301 | + "import evaluate\n", |
| 302 | + "\n", |
| 303 | + "metric = evaluate.load(\"accuracy\")" |
| 304 | + ] |
| 305 | + }, |
| 306 | + { |
| 307 | + "cell_type": "code", |
| 308 | + "execution_count": null, |
| 309 | + "id": "b82caeba-2daa-4526-b67d-04f45d4a9934", |
| 310 | + "metadata": {}, |
| 311 | + "outputs": [], |
| 312 | + "source": [ |
| 313 | + "def compute_metrics(eval_pred):\n", |
| 314 | + " logits, labels = eval_pred\n", |
| 315 | + " predictions = np.argmax(logits, axis=-1)\n", |
| 316 | + " return metric.compute(predictions=predictions, references=labels)" |
| 317 | + ] |
| 318 | + }, |
| 319 | + { |
| 320 | + "cell_type": "code", |
| 321 | + "execution_count": null, |
| 322 | + "id": "f5b50ec0-87b8-4578-96aa-e26bda9d99b8", |
| 323 | + "metadata": {}, |
| 324 | + "outputs": [], |
| 325 | + "source": [ |
| 326 | + "from transformers import TrainingArguments, Trainer\n", |
| 327 | + "\n", |
| 328 | + "training_args = TrainingArguments(output_dir=\"test_trainer\", evaluation_strategy=\"epoch\")" |
| 329 | + ] |
| 330 | + }, |
| 331 | + { |
| 332 | + "cell_type": "markdown", |
| 333 | + "id": "df2225ac-8e92-4a14-a368-eebff9ead6bf", |
| 334 | + "metadata": {}, |
| 335 | + "source": [ |
| 336 | + "Finally we can train our model!" |
| 337 | + ] |
| 338 | + }, |
| 339 | + { |
| 340 | + "cell_type": "code", |
| 341 | + "execution_count": null, |
| 342 | + "id": "e59332ae-c9e3-4a9b-9a7c-7020c87227da", |
| 343 | + "metadata": {}, |
| 344 | + "outputs": [], |
| 345 | + "source": [ |
| 346 | + "trainer = Trainer(\n", |
| 347 | + " model=model,\n", |
| 348 | + " args=training_args,\n", |
| 349 | + " train_dataset=train_dataset,\n", |
| 350 | + " eval_dataset=test_dataset,\n", |
| 351 | + " compute_metrics=compute_metrics,\n", |
| 352 | + ")" |
| 353 | + ] |
| 354 | + }, |
| 355 | + { |
| 356 | + "cell_type": "code", |
| 357 | + "execution_count": null, |
| 358 | + "id": "f35520bb-b6ca-4996-b87e-2fbfdcfc0dff", |
| 359 | + "metadata": {}, |
| 360 | + "outputs": [], |
| 361 | + "source": [ |
| 362 | + "trainer.train()" |
| 363 | + ] |
| 364 | + }, |
256 | 365 | {
|
257 | 366 | "cell_type": "markdown",
|
258 | 367 | "id": "6ac841f6-c65e-4ebf-8c42-3030e2f92cb0",
|
|
342 | 451 | "id": "9204b6dc-8f6e-407e-8c68-a036a6a5b7c9",
|
343 | 452 | "metadata": {},
|
344 | 453 | "source": [
|
345 |
| - "### Training our Model" |
| 454 | + "### Training our ModelFinetuning our Model via Vertex AI Training API" |
346 | 455 | ]
|
347 | 456 | },
|
348 | 457 | {
|
|
634 | 743 | "name": "python",
|
635 | 744 | "nbconvert_exporter": "python",
|
636 | 745 | "pygments_lexer": "ipython3",
|
637 |
| - "version": "3.10.12" |
| 746 | + "version": "3.10.13" |
638 | 747 | }
|
639 | 748 | },
|
640 | 749 | "nbformat": 4,
|
|
0 commit comments