|
563 | 563 | "len(dataset.filter(lambda ex: ex[\"ragas_score\"]<=0.8))"
|
564 | 564 | ]
|
565 | 565 | },
|
| 566 | + { |
| 567 | + "cell_type": "markdown", |
| 568 | + "id": "81d872f3-ba83-4d22-af08-4fdeb211f2a5", |
| 569 | + "metadata": {}, |
| 570 | + "source": [ |
| 571 | + "## Mining for hard negatives" |
| 572 | + ] |
| 573 | + }, |
566 | 574 | {
|
567 | 575 | "cell_type": "code",
|
568 | 576 | "execution_count": 28,
|
|
657 | 665 | "len(finetuning_dataset)"
|
658 | 666 | ]
|
659 | 667 | },
|
| 668 | + { |
| 669 | + "cell_type": "markdown", |
| 670 | + "id": "8944460b-e0db-4f7e-b0c3-bdc02c936965", |
| 671 | + "metadata": {}, |
| 672 | + "source": [ |
| 673 | + "## Dataset push to hub" |
| 674 | + ] |
| 675 | + }, |
| 676 | + { |
| 677 | + "cell_type": "code", |
| 678 | + "execution_count": 7, |
| 679 | + "id": "e016b96e-c760-4492-94e3-01be637c5122", |
| 680 | + "metadata": {}, |
| 681 | + "outputs": [], |
| 682 | + "source": [ |
| 683 | + "dataset = Dataset.from_json(\"finetuning_dataset.json\")" |
| 684 | + ] |
| 685 | + }, |
| 686 | + { |
| 687 | + "cell_type": "code", |
| 688 | + "execution_count": 13, |
| 689 | + "id": "8522ccd1-c653-4750-b560-385e99a890e1", |
| 690 | + "metadata": {}, |
| 691 | + "outputs": [], |
| 692 | + "source": [ |
| 693 | + "dataset = dataset.remove_columns([\"hard_negatives\",\"negatives\",\"positives\"])" |
| 694 | + ] |
| 695 | + }, |
| 696 | + { |
| 697 | + "cell_type": "code", |
| 698 | + "execution_count": 19, |
| 699 | + "id": "c78be3e2-8089-4e19-99d0-77542142c67f", |
| 700 | + "metadata": {}, |
| 701 | + "outputs": [ |
| 702 | + { |
| 703 | + "data": { |
| 704 | + "application/vnd.jupyter.widget-view+json": { |
| 705 | + "model_id": "03153dac8c4e400b90d754fba87c7eee", |
| 706 | + "version_major": 2, |
| 707 | + "version_minor": 0 |
| 708 | + }, |
| 709 | + "text/plain": [ |
| 710 | + "Pushing dataset shards to the dataset hub: 0%| | 0/1 [00:00<?, ?it/s]" |
| 711 | + ] |
| 712 | + }, |
| 713 | + "metadata": {}, |
| 714 | + "output_type": "display_data" |
| 715 | + }, |
| 716 | + { |
| 717 | + "data": { |
| 718 | + "application/vnd.jupyter.widget-view+json": { |
| 719 | + "model_id": "db77d857ce7c42708fe21265d42b954c", |
| 720 | + "version_major": 2, |
| 721 | + "version_minor": 0 |
| 722 | + }, |
| 723 | + "text/plain": [ |
| 724 | + "Creating parquet from Arrow format: 0%| | 0/2 [00:00<?, ?ba/s]" |
| 725 | + ] |
| 726 | + }, |
| 727 | + "metadata": {}, |
| 728 | + "output_type": "display_data" |
| 729 | + } |
| 730 | + ], |
| 731 | + "source": [ |
| 732 | + "dataset.push_to_hub(\"explodinggradients/qrecc_conversational_embeddings\")" |
| 733 | + ] |
| 734 | + }, |
660 | 735 | {
|
661 | 736 | "cell_type": "code",
|
662 | 737 | "execution_count": null,
|
663 |
| - "id": "7565a404-96d2-40d5-a063-a6e8b3bb9a90", |
| 738 | + "id": "cbbb2670-27db-466b-bf44-6fa6954235f2", |
664 | 739 | "metadata": {},
|
665 | 740 | "outputs": [],
|
666 | 741 | "source": []
|
|
0 commit comments