From d56e0253fde429f66607e25122f1c08baefc8294 Mon Sep 17 00:00:00 2001 From: Ahmed Elnaggar Date: Wed, 17 Mar 2021 19:21:54 +0100 Subject: [PATCH] Created using Colaboratory --- .../Advanced/ProtT5-XL-UniRef50.ipynb | 1746 +++++++++++++++++ 1 file changed, 1746 insertions(+) create mode 100644 Embedding/TensorFlow/Advanced/ProtT5-XL-UniRef50.ipynb diff --git a/Embedding/TensorFlow/Advanced/ProtT5-XL-UniRef50.ipynb b/Embedding/TensorFlow/Advanced/ProtT5-XL-UniRef50.ipynb new file mode 100644 index 0000000..aeae75c --- /dev/null +++ b/Embedding/TensorFlow/Advanced/ProtT5-XL-UniRef50.ipynb @@ -0,0 +1,1746 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "ProtT5-XL-BFD.ipynb", + "provenance": [], + "machine_shape": "hm", + "authorship_tag": "ABX9TyOLa8XPe3UQlXB979u8ejsk", + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "accelerator": "GPU", + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "625c498af5e54243a74d0e1442d76448": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "state": { + "_view_name": "HBoxView", + "_dom_classes": [], + "_model_name": "HBoxModel", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.5.0", + "box_style": "", + "layout": "IPY_MODEL_d5348e015a184d4db05dc48e1581ef44", + "_model_module": "@jupyter-widgets/controls", + "children": [ + "IPY_MODEL_856432cdffae474d8e8c6fafd79a3937", + "IPY_MODEL_a8b5fc22711745f99e61bed9e834872e" + ] + } + }, + "d5348e015a184d4db05dc48e1581ef44": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "856432cdffae474d8e8c6fafd79a3937": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "state": { + "_view_name": "ProgressView", + "style": "IPY_MODEL_ff459b2aa7114caca0d1ef25bf6e1790", + "_dom_classes": [], + "description": "Downloading: 100%", + "_model_name": "FloatProgressModel", + "bar_style": "success", + "max": 237990, + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": 237990, + "_view_count": null, + "_view_module_version": "1.5.0", + "orientation": "horizontal", + "min": 0, + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_e2a70fb106874689be06dacfa4d6d306" + } + }, + "a8b5fc22711745f99e61bed9e834872e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_763e98c23a574dc2bf451adbbc58a9a4", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": " 238k/238k [00:00<00:00, 250kB/s]", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_ad3d15cf7b07413588c5f3d80288288a" + } + }, + "ff459b2aa7114caca0d1ef25bf6e1790": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "ProgressStyleModel", + "description_width": "initial", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "bar_color": null, + "_model_module": "@jupyter-widgets/controls" + } + }, + "e2a70fb106874689be06dacfa4d6d306": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "763e98c23a574dc2bf451adbbc58a9a4": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "ad3d15cf7b07413588c5f3d80288288a": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "b21c919fb3944acf815c975826688eca": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "state": { + "_view_name": "HBoxView", + "_dom_classes": [], + "_model_name": "HBoxModel", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.5.0", + "box_style": "", + "layout": "IPY_MODEL_01b49e8c145b4a8bae89d8430e3fbdc6", + "_model_module": "@jupyter-widgets/controls", + "children": [ + "IPY_MODEL_d3e4ec9dec5a471881c66ea6f32a142b", + "IPY_MODEL_a943933f18a64c6d96a0248a5ce9bfd6" + ] + } + }, + "01b49e8c145b4a8bae89d8430e3fbdc6": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "d3e4ec9dec5a471881c66ea6f32a142b": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "state": { + "_view_name": "ProgressView", + "style": "IPY_MODEL_7e2c2223c67042f9ba2a1b62ac908f18", + "_dom_classes": [], + "description": "Downloading: 100%", + "_model_name": "FloatProgressModel", + "bar_style": "success", + "max": 1786, + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": 1786, + "_view_count": null, + "_view_module_version": "1.5.0", + "orientation": "horizontal", + "min": 0, + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_e91f71f474b74f39a5fb1155741a8d74" + } + }, + "a943933f18a64c6d96a0248a5ce9bfd6": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_a79c843eaaeb4df8a46f7c077a09899f", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": " 1.79k/1.79k [00:01<00:00, 1.27kB/s]", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_662dd8a497d64ee8b310197e72d66977" + } + }, + "7e2c2223c67042f9ba2a1b62ac908f18": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "ProgressStyleModel", + "description_width": "initial", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "bar_color": null, + "_model_module": "@jupyter-widgets/controls" + } + }, + "e91f71f474b74f39a5fb1155741a8d74": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "a79c843eaaeb4df8a46f7c077a09899f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "662dd8a497d64ee8b310197e72d66977": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "650ed09b93e84503a78917000039254a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "state": { + "_view_name": "HBoxView", + "_dom_classes": [], + "_model_name": "HBoxModel", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.5.0", + "box_style": "", + "layout": "IPY_MODEL_947c7ac18bae4d72a942fe114df012d9", + "_model_module": "@jupyter-widgets/controls", + "children": [ + "IPY_MODEL_06c281c8f6b54433beeee404a0cdcc69", + "IPY_MODEL_32856cc01ae248b98dc4527034ae3398" + ] + } + }, + "947c7ac18bae4d72a942fe114df012d9": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "06c281c8f6b54433beeee404a0cdcc69": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "state": { + "_view_name": "ProgressView", + "style": "IPY_MODEL_98ef6cdfca424b74b0f8cbc51630e0a8", + "_dom_classes": [], + "description": "Downloading: 100%", + "_model_name": "FloatProgressModel", + "bar_style": "success", + "max": 24, + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": 24, + "_view_count": null, + "_view_module_version": "1.5.0", + "orientation": "horizontal", + "min": 0, + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_88ab217893134f6997a317264c676064" + } + }, + "32856cc01ae248b98dc4527034ae3398": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_203fa60c4f4a49a38b9aaab941c863e9", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": " 24.0/24.0 [00:00<00:00, 24.8B/s]", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_255336e889924999b3378102cc007c95" + } + }, + "98ef6cdfca424b74b0f8cbc51630e0a8": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "ProgressStyleModel", + "description_width": "initial", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "bar_color": null, + "_model_module": "@jupyter-widgets/controls" + } + }, + "88ab217893134f6997a317264c676064": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "203fa60c4f4a49a38b9aaab941c863e9": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "255336e889924999b3378102cc007c95": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "6039c46157134a998e9e2b1ccec25b63": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "state": { + "_view_name": "HBoxView", + "_dom_classes": [], + "_model_name": "HBoxModel", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.5.0", + "box_style": "", + "layout": "IPY_MODEL_71af1636b5f840a2a07324812235db10", + "_model_module": "@jupyter-widgets/controls", + "children": [ + "IPY_MODEL_8b06d496d4704beebea534ddb5ad2330", + "IPY_MODEL_e699ece0feca440387d082c2b233b58d" + ] + } + }, + "71af1636b5f840a2a07324812235db10": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "8b06d496d4704beebea534ddb5ad2330": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "state": { + "_view_name": "ProgressView", + "style": "IPY_MODEL_b056a2433f154dbaa41f6840015ecf87", + "_dom_classes": [], + "description": "Downloading: 100%", + "_model_name": "FloatProgressModel", + "bar_style": "success", + "max": 546, + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": 546, + "_view_count": null, + "_view_module_version": "1.5.0", + "orientation": "horizontal", + "min": 0, + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_19b60494fe2443698785e34808f2027e" + } + }, + "e699ece0feca440387d082c2b233b58d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_7a63fb9f3a2d4fc3b74c85fb7fcbcf9f", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": " 546/546 [00:00<00:00, 1.71kB/s]", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_039fedc16a9e417c94beea59f5561d1a" + } + }, + "b056a2433f154dbaa41f6840015ecf87": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "ProgressStyleModel", + "description_width": "initial", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "bar_color": null, + "_model_module": "@jupyter-widgets/controls" + } + }, + "19b60494fe2443698785e34808f2027e": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "7a63fb9f3a2d4fc3b74c85fb7fcbcf9f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "039fedc16a9e417c94beea59f5561d1a": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "1a4ffa8252bb4b5584ad7bdd581cb6da": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "state": { + "_view_name": "HBoxView", + "_dom_classes": [], + "_model_name": "HBoxModel", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.5.0", + "box_style": "", + "layout": "IPY_MODEL_ebac9c962e824d7794c40ac4d0332b89", + "_model_module": "@jupyter-widgets/controls", + "children": [ + "IPY_MODEL_982e1e391da849d992f28a7c8ea473fb", + "IPY_MODEL_5494f8dece5c4a10b5f6af426a760067" + ] + } + }, + "ebac9c962e824d7794c40ac4d0332b89": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "982e1e391da849d992f28a7c8ea473fb": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "state": { + "_view_name": "ProgressView", + "style": "IPY_MODEL_d885ef1286954116aabbcdca34303124", + "_dom_classes": [], + "description": "Downloading: 38%", + "_model_name": "FloatProgressModel", + "bar_style": "", + "max": 11275562628, + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": 7441550336, + "_view_count": null, + "_view_module_version": "1.5.0", + "orientation": "horizontal", + "min": 0, + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_8cde50d61e224d50aaffb55ad8323680" + } + }, + "5494f8dece5c4a10b5f6af426a760067": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_04585a7bab58488990342e4081dfdd51", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": " 7.44G/11.3G [03:23<01:35, 40.2MB/s]", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_b7dfd3ea823c46a9b65bd8cc7bad793d" + } + }, + "d885ef1286954116aabbcdca34303124": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "ProgressStyleModel", + "description_width": "initial", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "bar_color": null, + "_model_module": "@jupyter-widgets/controls" + } + }, + "8cde50d61e224d50aaffb55ad8323680": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "04585a7bab58488990342e4081dfdd51": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "b7dfd3ea823c46a9b65bd8cc7bad793d": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + } + } + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Ts8Kh2P1U2Th" + }, + "source": [ + "##Important Notes:\n", + "1. ProtT5-XL-UniRef50 has both encoder and decoder, for feature extraction we only load and use the encoder part.\n", + "2. Loading only the encoder part, reduces the inference speed and the GPU memory requirements by half.\n", + "3. In order to use ProtT5-XL-UniRef50 encoder, you must install the latest huggingface transformers version from their GitHub repo.\n", + "4. If you are intersted in both the encoder and decoder, you should use TFT5Model rather than TFT5EncoderModel." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "LQcCEtrC6Udr" + }, + "source": [ + "

Extracting protein sequences' features using ProtT5-XL-UniRef50 pretrained-model

" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9wCEAM9F5wTA" + }, + "source": [ + "\n", + "**1. Load necessry libraries including huggingface transformers**" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "GXAKFATm-mbs", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "d1f0cf69-ba5e-421f-e19d-ec7c8e58932f" + }, + "source": [ + "!pip install -q SentencePiece transformers" + ], + "execution_count": 1, + "outputs": [ + { + "output_type": "stream", + "text": [ + "\u001b[K |████████████████████████████████| 1.2MB 7.6MB/s \n", + "\u001b[K |████████████████████████████████| 2.1MB 53.7MB/s \n", + "\u001b[K |████████████████████████████████| 3.2MB 55.0MB/s \n", + "\u001b[K |████████████████████████████████| 890kB 59.1MB/s \n", + "\u001b[?25h Building wheel for sacremoses (setup.py) ... \u001b[?25l\u001b[?25hdone\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "0UhF4ih050mu" + }, + "source": [ + "from transformers import TFT5EncoderModel, T5Tokenizer\n", + "import numpy as np\n", + "import re\n", + "import gc" + ], + "execution_count": 2, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hAKCMu_2-h2V" + }, + "source": [ + "2. Load the vocabulary and ProtT5-XL-UniRef50 Model" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "HS8i5sOJ-h2W", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 164, + "referenced_widgets": [ + "625c498af5e54243a74d0e1442d76448", + "d5348e015a184d4db05dc48e1581ef44", + "856432cdffae474d8e8c6fafd79a3937", + "a8b5fc22711745f99e61bed9e834872e", + "ff459b2aa7114caca0d1ef25bf6e1790", + "e2a70fb106874689be06dacfa4d6d306", + "763e98c23a574dc2bf451adbbc58a9a4", + "ad3d15cf7b07413588c5f3d80288288a", + "b21c919fb3944acf815c975826688eca", + "01b49e8c145b4a8bae89d8430e3fbdc6", + "d3e4ec9dec5a471881c66ea6f32a142b", + "a943933f18a64c6d96a0248a5ce9bfd6", + "7e2c2223c67042f9ba2a1b62ac908f18", + "e91f71f474b74f39a5fb1155741a8d74", + "a79c843eaaeb4df8a46f7c077a09899f", + "662dd8a497d64ee8b310197e72d66977", + "650ed09b93e84503a78917000039254a", + "947c7ac18bae4d72a942fe114df012d9", + "06c281c8f6b54433beeee404a0cdcc69", + "32856cc01ae248b98dc4527034ae3398", + "98ef6cdfca424b74b0f8cbc51630e0a8", + "88ab217893134f6997a317264c676064", + "203fa60c4f4a49a38b9aaab941c863e9", + "255336e889924999b3378102cc007c95" + ] + }, + "outputId": "9a4e394b-ea05-434e-cc8b-b762f16287f8" + }, + "source": [ + "tokenizer = T5Tokenizer.from_pretrained(\"Rostlab/prot_t5_xl_uniref50\", do_lower_case=False )" + ], + "execution_count": 3, + "outputs": [ + { + "output_type": "display_data", + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "625c498af5e54243a74d0e1442d76448", + "version_minor": 0, + "version_major": 2 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=237990.0, style=ProgressStyle(descripti…" + ] + }, + "metadata": { + "tags": [] + } + }, + { + "output_type": "stream", + "text": [ + "\n" + ], + "name": "stdout" + }, + { + "output_type": "display_data", + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "b21c919fb3944acf815c975826688eca", + "version_minor": 0, + "version_major": 2 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1786.0, style=ProgressStyle(description…" + ] + }, + "metadata": { + "tags": [] + } + }, + { + "output_type": "stream", + "text": [ + "\n" + ], + "name": "stdout" + }, + { + "output_type": "display_data", + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "650ed09b93e84503a78917000039254a", + "version_minor": 0, + "version_major": 2 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=24.0, style=ProgressStyle(description_w…" + ] + }, + "metadata": { + "tags": [] + } + }, + { + "output_type": "stream", + "text": [ + "\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "ERtkR05t-h2c", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 220, + "referenced_widgets": [ + "6039c46157134a998e9e2b1ccec25b63", + "71af1636b5f840a2a07324812235db10", + "8b06d496d4704beebea534ddb5ad2330", + "e699ece0feca440387d082c2b233b58d", + "b056a2433f154dbaa41f6840015ecf87", + "19b60494fe2443698785e34808f2027e", + "7a63fb9f3a2d4fc3b74c85fb7fcbcf9f", + "039fedc16a9e417c94beea59f5561d1a", + "1a4ffa8252bb4b5584ad7bdd581cb6da", + "ebac9c962e824d7794c40ac4d0332b89", + "982e1e391da849d992f28a7c8ea473fb", + "5494f8dece5c4a10b5f6af426a760067", + "d885ef1286954116aabbcdca34303124", + "8cde50d61e224d50aaffb55ad8323680", + "04585a7bab58488990342e4081dfdd51", + "b7dfd3ea823c46a9b65bd8cc7bad793d" + ] + }, + "outputId": "4c051838-eb47-430a-a738-3adfecdfd7a5" + }, + "source": [ + "model = TFT5EncoderModel.from_pretrained(\"Rostlab/prot_t5_xl_uniref50\", from_pt=True)" + ], + "execution_count": 4, + "outputs": [ + { + "output_type": "display_data", + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "6039c46157134a998e9e2b1ccec25b63", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=546.0, style=ProgressStyle(description_…" + ] + }, + "metadata": { + "tags": [] + } + }, + { + "output_type": "stream", + "text": [ + "\n" + ], + "name": "stdout" + }, + { + "output_type": "display_data", + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "1a4ffa8252bb4b5584ad7bdd581cb6da", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=11275562628.0, style=ProgressStyle(desc…" + ] + }, + "metadata": { + "tags": [] + } + }, + { + "output_type": "stream", + "text": [ + "\n" + ], + "name": "stdout" + }, + { + "output_type": "stream", + "text": [ + "Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFT5EncoderModel: ['decoder.block.12.layer.0.layer_norm.weight', 'decoder.block.12.layer.1.EncDecAttention.o.weight', 'decoder.block.4.layer.0.SelfAttention.q.weight', 'decoder.block.13.layer.1.EncDecAttention.v.weight', 'decoder.block.14.layer.0.SelfAttention.q.weight', 'decoder.block.10.layer.1.EncDecAttention.q.weight', 'decoder.block.19.layer.1.layer_norm.weight', 'decoder.block.18.layer.2.DenseReluDense.wi.weight', 'decoder.block.23.layer.2.DenseReluDense.wo.weight', 'decoder.block.5.layer.1.EncDecAttention.q.weight', 'lm_head.weight', 'decoder.block.20.layer.0.SelfAttention.o.weight', 'decoder.block.2.layer.1.EncDecAttention.v.weight', 'decoder.block.4.layer.0.SelfAttention.v.weight', 'decoder.block.15.layer.0.SelfAttention.k.weight', 'decoder.block.17.layer.1.layer_norm.weight', 'decoder.block.2.layer.1.layer_norm.weight', 'decoder.block.13.layer.1.layer_norm.weight', 'decoder.block.16.layer.2.DenseReluDense.wo.weight', 'decoder.block.3.layer.2.DenseReluDense.wi.weight', 'decoder.block.9.layer.1.layer_norm.weight', 'decoder.block.8.layer.1.EncDecAttention.v.weight', 'decoder.block.5.layer.1.EncDecAttention.v.weight', 'decoder.block.17.layer.2.layer_norm.weight', 'decoder.block.13.layer.2.DenseReluDense.wi.weight', 'decoder.block.20.layer.1.EncDecAttention.v.weight', 'decoder.block.0.layer.2.DenseReluDense.wi.weight', 'decoder.block.13.layer.0.SelfAttention.k.weight', 'decoder.block.19.layer.0.SelfAttention.v.weight', 'decoder.block.23.layer.1.EncDecAttention.k.weight', 'decoder.block.2.layer.1.EncDecAttention.q.weight', 'decoder.block.17.layer.0.SelfAttention.q.weight', 'decoder.block.10.layer.2.layer_norm.weight', 'decoder.block.5.layer.2.layer_norm.weight', 'decoder.block.18.layer.1.EncDecAttention.k.weight', 'decoder.block.18.layer.1.layer_norm.weight', 'decoder.block.1.layer.0.SelfAttention.k.weight', 'decoder.block.23.layer.0.SelfAttention.o.weight', 'decoder.block.7.layer.2.layer_norm.weight', 'decoder.block.4.layer.1.EncDecAttention.v.weight', 'decoder.block.3.layer.1.EncDecAttention.o.weight', 'decoder.block.7.layer.1.layer_norm.weight', 'decoder.block.20.layer.1.EncDecAttention.k.weight', 'decoder.block.16.layer.0.SelfAttention.q.weight', 'decoder.block.8.layer.0.SelfAttention.q.weight', 'decoder.block.1.layer.0.SelfAttention.v.weight', 'decoder.block.1.layer.0.layer_norm.weight', 'decoder.block.0.layer.1.EncDecAttention.q.weight', 'decoder.block.12.layer.0.SelfAttention.q.weight', 'decoder.block.17.layer.0.SelfAttention.k.weight', 'decoder.block.1.layer.2.DenseReluDense.wi.weight', 'decoder.block.11.layer.1.EncDecAttention.k.weight', 'decoder.block.16.layer.0.SelfAttention.o.weight', 'decoder.block.21.layer.0.SelfAttention.v.weight', 'decoder.block.3.layer.0.layer_norm.weight', 'decoder.block.14.layer.1.EncDecAttention.v.weight', 'decoder.block.15.layer.1.EncDecAttention.o.weight', 'decoder.block.3.layer.1.layer_norm.weight', 'decoder.block.19.layer.2.DenseReluDense.wi.weight', 'decoder.block.1.layer.1.EncDecAttention.q.weight', 'decoder.block.9.layer.0.layer_norm.weight', 'decoder.block.7.layer.0.SelfAttention.k.weight', 'decoder.block.11.layer.0.SelfAttention.k.weight', 'decoder.block.22.layer.0.SelfAttention.o.weight', 'decoder.block.22.layer.2.DenseReluDense.wo.weight', 'decoder.block.23.layer.0.SelfAttention.q.weight', 'decoder.block.7.layer.1.EncDecAttention.o.weight', 'decoder.block.9.layer.1.EncDecAttention.v.weight', 'decoder.block.18.layer.2.DenseReluDense.wo.weight', 'decoder.block.21.layer.0.layer_norm.weight', 'decoder.block.16.layer.0.layer_norm.weight', 'decoder.block.20.layer.0.SelfAttention.v.weight', 'decoder.block.22.layer.0.SelfAttention.q.weight', 'decoder.block.1.layer.1.EncDecAttention.o.weight', 'decoder.block.15.layer.0.SelfAttention.q.weight', 'decoder.block.9.layer.0.SelfAttention.k.weight', 'decoder.block.2.layer.1.EncDecAttention.k.weight', 'decoder.block.1.layer.0.SelfAttention.o.weight', 'decoder.block.17.layer.2.DenseReluDense.wo.weight', 'decoder.block.20.layer.1.EncDecAttention.o.weight', 'decoder.block.11.layer.1.layer_norm.weight', 'decoder.block.14.layer.0.SelfAttention.o.weight', 'decoder.block.16.layer.1.EncDecAttention.v.weight', 'decoder.block.4.layer.2.DenseReluDense.wo.weight', 'decoder.block.4.layer.1.layer_norm.weight', 'decoder.block.20.layer.1.layer_norm.weight', 'decoder.block.13.layer.1.EncDecAttention.o.weight', 'decoder.block.23.layer.0.SelfAttention.k.weight', 'decoder.block.14.layer.0.SelfAttention.k.weight', 'decoder.block.6.layer.2.DenseReluDense.wi.weight', 'decoder.block.8.layer.0.SelfAttention.v.weight', 'decoder.block.5.layer.1.EncDecAttention.o.weight', 'decoder.block.7.layer.1.EncDecAttention.q.weight', 'decoder.block.12.layer.2.layer_norm.weight', 'decoder.block.23.layer.1.EncDecAttention.o.weight', 'decoder.block.2.layer.0.SelfAttention.v.weight', 'decoder.block.6.layer.1.EncDecAttention.q.weight', 'decoder.block.6.layer.1.EncDecAttention.o.weight', 'decoder.block.22.layer.2.layer_norm.weight', 'decoder.block.9.layer.0.SelfAttention.v.weight', 'decoder.block.20.layer.1.EncDecAttention.q.weight', 'decoder.block.21.layer.0.SelfAttention.k.weight', 'decoder.block.17.layer.0.layer_norm.weight', 'decoder.block.11.layer.1.EncDecAttention.v.weight', 'decoder.block.4.layer.0.SelfAttention.k.weight', 'decoder.block.18.layer.1.EncDecAttention.q.weight', 'decoder.block.19.layer.2.layer_norm.weight', 'decoder.block.15.layer.0.layer_norm.weight', 'decoder.block.6.layer.2.layer_norm.weight', 'decoder.block.8.layer.0.SelfAttention.o.weight', 'decoder.block.17.layer.1.EncDecAttention.q.weight', 'decoder.block.5.layer.0.SelfAttention.v.weight', 'decoder.block.8.layer.0.SelfAttention.k.weight', 'decoder.block.2.layer.0.SelfAttention.q.weight', 'decoder.block.21.layer.2.layer_norm.weight', 'decoder.block.18.layer.0.SelfAttention.k.weight', 'decoder.block.18.layer.0.SelfAttention.v.weight', 'decoder.block.13.layer.0.layer_norm.weight', 'decoder.block.22.layer.1.EncDecAttention.q.weight', 'decoder.block.23.layer.1.layer_norm.weight', 'decoder.block.2.layer.0.SelfAttention.k.weight', 'decoder.block.5.layer.0.SelfAttention.k.weight', 'decoder.block.13.layer.0.SelfAttention.o.weight', 'decoder.block.10.layer.2.DenseReluDense.wo.weight', 'decoder.block.21.layer.0.SelfAttention.q.weight', 'decoder.block.19.layer.1.EncDecAttention.q.weight', 'decoder.block.0.layer.0.SelfAttention.o.weight', 'encoder.embed_tokens.weight', 'decoder.block.5.layer.2.DenseReluDense.wi.weight', 'decoder.block.13.layer.2.DenseReluDense.wo.weight', 'decoder.block.14.layer.0.layer_norm.weight', 'decoder.block.19.layer.0.SelfAttention.q.weight', 'decoder.block.4.layer.1.EncDecAttention.k.weight', 'decoder.block.1.layer.1.EncDecAttention.v.weight', 'decoder.block.23.layer.1.EncDecAttention.v.weight', 'decoder.block.10.layer.2.DenseReluDense.wi.weight', 'decoder.block.23.layer.0.layer_norm.weight', 'decoder.block.23.layer.1.EncDecAttention.q.weight', 'decoder.block.21.layer.1.EncDecAttention.q.weight', 'decoder.block.11.layer.1.EncDecAttention.q.weight', 'decoder.block.5.layer.1.EncDecAttention.k.weight', 'decoder.block.23.layer.0.SelfAttention.v.weight', 'decoder.block.13.layer.0.SelfAttention.q.weight', 'decoder.block.2.layer.2.DenseReluDense.wi.weight', 'decoder.block.12.layer.1.EncDecAttention.q.weight', 'decoder.block.0.layer.2.DenseReluDense.wo.weight', 'decoder.block.9.layer.2.layer_norm.weight', 'decoder.block.11.layer.2.DenseReluDense.wo.weight', 'decoder.block.22.layer.1.layer_norm.weight', 'decoder.block.1.layer.0.SelfAttention.q.weight', 'decoder.block.20.layer.0.SelfAttention.k.weight', 'decoder.block.13.layer.0.SelfAttention.v.weight', 'decoder.block.17.layer.1.EncDecAttention.o.weight', 'decoder.block.10.layer.0.layer_norm.weight', 'decoder.block.10.layer.0.SelfAttention.v.weight', 'decoder.block.14.layer.1.EncDecAttention.o.weight', 'decoder.block.6.layer.2.DenseReluDense.wo.weight', 'decoder.block.14.layer.1.EncDecAttention.q.weight', 'decoder.block.2.layer.0.layer_norm.weight', 'decoder.block.2.layer.1.EncDecAttention.o.weight', 'decoder.block.12.layer.2.DenseReluDense.wi.weight', 'decoder.block.4.layer.0.layer_norm.weight', 'decoder.block.11.layer.2.layer_norm.weight', 'decoder.block.17.layer.1.EncDecAttention.k.weight', 'decoder.block.20.layer.0.layer_norm.weight', 'decoder.block.0.layer.1.EncDecAttention.k.weight', 'decoder.block.12.layer.0.SelfAttention.o.weight', 'decoder.block.21.layer.1.EncDecAttention.o.weight', 'decoder.block.20.layer.2.layer_norm.weight', 'decoder.block.7.layer.0.SelfAttention.o.weight', 'decoder.block.12.layer.1.EncDecAttention.k.weight', 'decoder.block.9.layer.0.SelfAttention.o.weight', 'decoder.block.15.layer.1.EncDecAttention.k.weight', 'decoder.block.16.layer.2.DenseReluDense.wi.weight', 'decoder.block.18.layer.0.SelfAttention.o.weight', 'decoder.block.20.layer.2.DenseReluDense.wi.weight', 'decoder.block.3.layer.1.EncDecAttention.v.weight', 'decoder.block.6.layer.0.layer_norm.weight', 'decoder.block.16.layer.1.EncDecAttention.q.weight', 'decoder.block.7.layer.2.DenseReluDense.wo.weight', 'decoder.block.2.layer.2.layer_norm.weight', 'decoder.block.22.layer.1.EncDecAttention.v.weight', 'decoder.block.9.layer.2.DenseReluDense.wo.weight', 'decoder.block.20.layer.0.SelfAttention.q.weight', 'decoder.block.23.layer.2.DenseReluDense.wi.weight', 'decoder.block.15.layer.2.DenseReluDense.wi.weight', 'decoder.block.0.layer.2.layer_norm.weight', 'decoder.block.5.layer.0.SelfAttention.q.weight', 'decoder.block.0.layer.1.layer_norm.weight', 'decoder.block.4.layer.2.DenseReluDense.wi.weight', 'decoder.block.8.layer.1.EncDecAttention.q.weight', 'decoder.block.16.layer.0.SelfAttention.v.weight', 'decoder.block.22.layer.2.DenseReluDense.wi.weight', 'decoder.block.5.layer.0.layer_norm.weight', 'decoder.block.19.layer.0.SelfAttention.k.weight', 'decoder.block.7.layer.2.DenseReluDense.wi.weight', 'decoder.block.7.layer.0.SelfAttention.v.weight', 'decoder.block.22.layer.0.layer_norm.weight', 'decoder.final_layer_norm.weight', 'decoder.block.8.layer.1.EncDecAttention.k.weight', 'decoder.block.15.layer.1.EncDecAttention.v.weight', 'decoder.block.17.layer.2.DenseReluDense.wi.weight', 'decoder.block.6.layer.0.SelfAttention.k.weight', 'decoder.block.14.layer.1.layer_norm.weight', 'decoder.block.11.layer.0.SelfAttention.q.weight', 'decoder.block.10.layer.1.layer_norm.weight', 'decoder.block.22.layer.0.SelfAttention.k.weight', 'decoder.block.1.layer.2.layer_norm.weight', 'decoder.block.2.layer.0.SelfAttention.o.weight', 'decoder.block.21.layer.1.layer_norm.weight', 'decoder.block.15.layer.1.EncDecAttention.q.weight', 'decoder.block.18.layer.0.layer_norm.weight', 'decoder.block.6.layer.0.SelfAttention.o.weight', 'decoder.block.4.layer.2.layer_norm.weight', 'decoder.block.12.layer.0.SelfAttention.k.weight', 'decoder.block.11.layer.0.layer_norm.weight', 'decoder.block.18.layer.1.EncDecAttention.o.weight', 'decoder.block.11.layer.2.DenseReluDense.wi.weight', 'decoder.block.14.layer.2.layer_norm.weight', 'decoder.block.21.layer.2.DenseReluDense.wi.weight', 'decoder.block.13.layer.1.EncDecAttention.q.weight', 'decoder.block.17.layer.0.SelfAttention.v.weight', 'decoder.block.18.layer.2.layer_norm.weight', 'decoder.block.9.layer.1.EncDecAttention.q.weight', 'decoder.block.17.layer.1.EncDecAttention.v.weight', 'decoder.block.2.layer.2.DenseReluDense.wo.weight', 'decoder.block.4.layer.1.EncDecAttention.q.weight', 'decoder.block.0.layer.0.SelfAttention.k.weight', 'decoder.block.21.layer.1.EncDecAttention.k.weight', 'decoder.block.8.layer.0.layer_norm.weight', 'decoder.block.19.layer.0.layer_norm.weight', 'decoder.block.12.layer.1.layer_norm.weight', 'decoder.block.5.layer.0.SelfAttention.o.weight', 'decoder.block.10.layer.1.EncDecAttention.k.weight', 'decoder.block.22.layer.0.SelfAttention.v.weight', 'decoder.block.19.layer.1.EncDecAttention.v.weight', 'decoder.block.11.layer.0.SelfAttention.o.weight', 'decoder.block.21.layer.0.SelfAttention.o.weight', 'decoder.block.10.layer.0.SelfAttention.o.weight', 'decoder.block.14.layer.1.EncDecAttention.k.weight', 'decoder.block.16.layer.1.EncDecAttention.o.weight', 'decoder.block.3.layer.1.EncDecAttention.k.weight', 'decoder.block.9.layer.2.DenseReluDense.wi.weight', 'decoder.block.3.layer.0.SelfAttention.o.weight', 'decoder.block.17.layer.0.SelfAttention.o.weight', 'decoder.block.19.layer.1.EncDecAttention.k.weight', 'decoder.block.1.layer.2.DenseReluDense.wo.weight', 'decoder.block.15.layer.0.SelfAttention.v.weight', 'decoder.block.16.layer.1.layer_norm.weight', 'decoder.block.0.layer.0.layer_norm.weight', 'decoder.block.10.layer.1.EncDecAttention.v.weight', 'decoder.block.14.layer.2.DenseReluDense.wi.weight', 'decoder.block.3.layer.2.layer_norm.weight', 'decoder.block.3.layer.0.SelfAttention.k.weight', 'decoder.block.6.layer.1.EncDecAttention.v.weight', 'decoder.block.18.layer.1.EncDecAttention.v.weight', 'decoder.block.14.layer.0.SelfAttention.v.weight', 'decoder.block.8.layer.1.layer_norm.weight', 'decoder.block.8.layer.2.layer_norm.weight', 'decoder.block.0.layer.0.SelfAttention.relative_attention_bias.weight', 'decoder.block.11.layer.0.SelfAttention.v.weight', 'decoder.block.4.layer.0.SelfAttention.o.weight', 'decoder.block.12.layer.0.SelfAttention.v.weight', 'decoder.block.9.layer.0.SelfAttention.q.weight', 'decoder.block.19.layer.2.DenseReluDense.wo.weight', 'decoder.block.13.layer.2.layer_norm.weight', 'decoder.block.5.layer.1.layer_norm.weight', 'decoder.block.6.layer.0.SelfAttention.q.weight', 'decoder.block.7.layer.1.EncDecAttention.v.weight', 'decoder.block.16.layer.2.layer_norm.weight', 'decoder.block.11.layer.1.EncDecAttention.o.weight', 'decoder.block.20.layer.2.DenseReluDense.wo.weight', 'decoder.block.10.layer.0.SelfAttention.k.weight', 'decoder.block.0.layer.1.EncDecAttention.v.weight', 'decoder.block.21.layer.2.DenseReluDense.wo.weight', 'decoder.block.0.layer.0.SelfAttention.q.weight', 'decoder.block.6.layer.0.SelfAttention.v.weight', 'decoder.block.6.layer.1.layer_norm.weight', 'decoder.embed_tokens.weight', 'decoder.block.15.layer.2.layer_norm.weight', 'decoder.block.21.layer.1.EncDecAttention.v.weight', 'decoder.block.7.layer.0.layer_norm.weight', 'decoder.block.5.layer.2.DenseReluDense.wo.weight', 'decoder.block.7.layer.1.EncDecAttention.k.weight', 'decoder.block.22.layer.1.EncDecAttention.k.weight', 'decoder.block.23.layer.2.layer_norm.weight', 'decoder.block.19.layer.1.EncDecAttention.o.weight', 'decoder.block.0.layer.1.EncDecAttention.o.weight', 'decoder.block.3.layer.1.EncDecAttention.q.weight', 'decoder.block.9.layer.1.EncDecAttention.k.weight', 'decoder.block.13.layer.1.EncDecAttention.k.weight', 'decoder.block.14.layer.2.DenseReluDense.wo.weight', 'decoder.block.12.layer.2.DenseReluDense.wo.weight', 'decoder.block.1.layer.1.EncDecAttention.k.weight', 'decoder.block.6.layer.1.EncDecAttention.k.weight', 'decoder.block.3.layer.2.DenseReluDense.wo.weight', 'decoder.block.7.layer.0.SelfAttention.q.weight', 'decoder.block.22.layer.1.EncDecAttention.o.weight', 'decoder.block.8.layer.2.DenseReluDense.wi.weight', 'decoder.block.15.layer.2.DenseReluDense.wo.weight', 'decoder.block.15.layer.0.SelfAttention.o.weight', 'decoder.block.10.layer.1.EncDecAttention.o.weight', 'decoder.block.8.layer.2.DenseReluDense.wo.weight', 'decoder.block.18.layer.0.SelfAttention.q.weight', 'decoder.block.8.layer.1.EncDecAttention.o.weight', 'decoder.block.3.layer.0.SelfAttention.v.weight', 'decoder.block.3.layer.0.SelfAttention.q.weight', 'decoder.block.10.layer.0.SelfAttention.q.weight', 'decoder.block.16.layer.0.SelfAttention.k.weight', 'decoder.block.19.layer.0.SelfAttention.o.weight', 'decoder.block.9.layer.1.EncDecAttention.o.weight', 'decoder.block.1.layer.1.layer_norm.weight', 'decoder.block.15.layer.1.layer_norm.weight', 'decoder.block.16.layer.1.EncDecAttention.k.weight', 'decoder.block.12.layer.1.EncDecAttention.v.weight', 'decoder.block.4.layer.1.EncDecAttention.o.weight', 'decoder.block.0.layer.0.SelfAttention.v.weight']\n", + "- This IS expected if you are initializing TFT5EncoderModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).\n", + "- This IS NOT expected if you are initializing TFT5EncoderModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).\n", + "All the weights of TFT5EncoderModel were initialized from the PyTorch model.\n", + "If your task is similar to the task the model of the checkpoint was trained on, you can already use TFT5EncoderModel for predictions without further training.\n" + ], + "name": "stderr" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "YN6nqLFuY0k2", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "d08b4b48-c046-46be-e080-9aeb6381ceb1" + }, + "source": [ + "gc.collect()" + ], + "execution_count": 5, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "50" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 0 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ZkqAotTcdZnW" + }, + "source": [ + "3. Create or load sequences and map rarely occured amino acids (U,Z,O,B) to (X)" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "a0zwKinIdZnX" + }, + "source": [ + "sequences_Example = [\"A E T C Z A O\",\"S K T Z P\"]" + ], + "execution_count": 6, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "EkINwL9DdZna" + }, + "source": [ + "sequences_Example = [re.sub(r\"[UZOB]\", \"X\", sequence) for sequence in sequences_Example]" + ], + "execution_count": 7, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "66BZEB3MdZnf" + }, + "source": [ + "4. Tokenize, encode sequences and load it into the GPU if possibile" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "xt5uYuu7dZnf" + }, + "source": [ + "ids = tokenizer.batch_encode_plus(sequences_Example, add_special_tokens=True, padding=True, return_tensors=\"tf\")" + ], + "execution_count": 8, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "Grl3ieUhdZnj" + }, + "source": [ + "input_ids = ids['input_ids']\n", + "attention_mask = ids['attention_mask']" + ], + "execution_count": 9, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Zylf1HyBdZnl" + }, + "source": [ + "5. Extracting sequences' features and load it into the CPU if needed" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "i8CVGPRFdZnm" + }, + "source": [ + "embedding = model(input_ids)" + ], + "execution_count": 10, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "P8IsEs35T0T4" + }, + "source": [ + "embedding = np.asarray(embedding.last_hidden_state)" + ], + "execution_count": 11, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "7Q2AGxlCUrot" + }, + "source": [ + "attention_mask = np.asarray(attention_mask)" + ], + "execution_count": 12, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "R6oeRZ7xdZns" + }, + "source": [ + "7. Remove padding (\\) and special tokens (\\) that is added by ProtT5-XL-BFD model" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "1XXoVSPDdZns" + }, + "source": [ + "features = [] \n", + "for seq_num in range(len(embedding)):\n", + " seq_len = (attention_mask[seq_num] == 1).sum()\n", + " seq_emd = embedding[seq_num][:seq_len-1]\n", + " features.append(seq_emd)" + ], + "execution_count": 13, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "jiRaUov9eyii", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "c61e9d8d-f352-4f9c-f55d-e8b81e70e27a" + }, + "source": [ + "print(features)" + ], + "execution_count": 14, + "outputs": [ + { + "output_type": "stream", + "text": [ + "[array([[ 0.22006367, -0.10348108, -0.24565518, ..., 0.07009576,\n", + " 0.18895069, -0.12561187],\n", + " [ 0.10839709, -0.14164549, -0.2611331 , ..., 0.0752494 ,\n", + " -0.19057555, 0.14047642],\n", + " [ 0.20587248, -0.06389292, -0.3473575 , ..., -0.025124 ,\n", + " 0.00397164, 0.00148447],\n", + " ...,\n", + " [ 0.4088016 , -0.08833068, -0.12417938, ..., 0.0790412 ,\n", + " 0.47919267, 0.16784595],\n", + " [ 0.09277283, -0.08904049, -0.2286643 , ..., -0.12425306,\n", + " 0.15413284, -0.22367832],\n", + " [ 0.3757485 , -0.05730629, -0.19935629, ..., 0.04669633,\n", + " 0.27533904, -0.10681018]], dtype=float32), array([[ 0.1954189 , -0.10268322, -0.23747759, ..., -0.14768864,\n", + " -0.04564754, -0.00130377],\n", + " [ 0.15905248, -0.12580584, -0.04593242, ..., -0.04285887,\n", + " -0.2786491 , 0.02392592],\n", + " [ 0.335001 , -0.08294542, -0.08190978, ..., -0.02507642,\n", + " 0.08196809, -0.17845191],\n", + " [ 0.23378026, -0.19225256, -0.1894767 , ..., 0.05166554,\n", + " 0.5063019 , -0.05276478],\n", + " [ 0.30331337, 0.06112606, 0.06495828, ..., 0.05243773,\n", + " -0.02636664, -0.00536591]], dtype=float32)]\n" + ], + "name": "stdout" + } + ] + } + ] +} \ No newline at end of file