Created using Colaboratory

jazzar-dev · jazzar-dev · commit c7a07fd332b9 · 2022-08-07T11:55:44.000+03:00
diff --git a/data_and_ai.ipynb b/data_and_ai.ipynb
@@ -0,0 +1,150 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "name": "Untitled15.ipynb",
+      "private_outputs": true,
+      "provenance": [],
+      "collapsed_sections": [],
+      "authorship_tag": "ABX9TyMjyfI1mJ1x53gJIzD7svPA",
+      "include_colab_link": true
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "view-in-github",
+        "colab_type": "text"
+      },
+      "source": [
+        "<a href=\"https://colab.research.google.com/github/jazzar-dev/Snippets/blob/main/data_and_ai.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# Data class"
+      ],
+      "metadata": {
+        "id": "5nAH5W-EVRT1"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from urllib import request\n",
+        "import pandas as pd\n",
+        "import zipfile\n",
+        "import numpy as np\n",
+        "from http.client import error\n",
+        "import tensorflow as tf\n",
+        "import matplotlib.pyplot as plt\n",
+        "import os\n",
+        "class data:\n",
+        "  def __init__(self,directory=[], is_download = False,is_zip=False,extract_loc='./',text=False,url=None,file_save_name='Temporary_dataset',file_name=[],is_csv=False):\n",
+        "    self.is_download = is_download\n",
+        "    self.extract_loc = extract_loc\n",
+        "    self.text = text\n",
+        "    self.x = []\n",
+        "    self.url = url\n",
+        "    self.file_save_name = file_save_name\n",
+        "    self.file_name = file_name\n",
+        "    self.is_csv = is_csv\n",
+        "    self.is_zip = is_zip\n",
+        "    self.file_load_path = []\n",
+        "    self.directory = directory\n",
+        "    if self.is_download and self.url:\n",
+        "      data.download(self)\n",
+        "    elif text:\n",
+        "      self.load_txt()\n",
+        "    elif is_csv:\n",
+        "      self.load_csv()\n",
+        "    elif self.directory !=[]:\n",
+        "      self.remove_dir()\n",
+        "  def download(self):\n",
+        "    request.urlretrieve(self.url, self.file_save_name)\n",
+        "    if self.is_zip:\n",
+        "      with zipfile.ZipFile(self.file_save_name, 'r') as zip_ref:\n",
+        "        zip_ref.extractall(self.extract_loc)\n",
+        "      os.remove(self.file_save_name)\n",
+        "    try:\n",
+        "      if self.text:\n",
+        "        self.load_txt()\n",
+        "      elif self.is_csv:\n",
+        "        self.load_csv()\n",
+        "    except error:\n",
+        "      return 0\n",
+        "  def load_csv(self):\n",
+        "    if self.file_load_path == []:\n",
+        "      try:\n",
+        "        for root, dirs, files in os.walk(self.extract_loc):\n",
+        "          for name in files:\n",
+        "            for f in self.file_name: \n",
+        "              if name == f:\n",
+        "                self.file_load_path.append(os.path.abspath(os.path.join(root, name)))\n",
+        "      except error:\n",
+        "        return 0\n",
+        "    for i in range(len(self.file_load_path)):\n",
+        "      self.x.append(self.file_load_path[i])\n",
+        "      self.x.append(pd.read_csv(self.file_load_path[i]))\n",
+        "  def load_txt(self):\n",
+        "    if self.file_load_path == []:\n",
+        "      try:\n",
+        "        for root, dirs, files in os.walk(self.extract_loc):\n",
+        "          for name in files:\n",
+        "            for f in self.file_name:\n",
+        "              if name == f:\n",
+        "                self.file_load_path.append(os.path.abspath(os.path.join(root, name)))\n",
+        "        for i in range(len(self.file_load_path)):\n",
+        "          self.x.append(self.file_load_path[i])\n",
+        "          self.x.append(np.loadtxt(self.file_load_path[i]))\n",
+        "      except error:\n",
+        "        return \"error in load txt\"\n",
+        "    \n",
+        "  def remove_dir(self):\n",
+        "    for d in self.directory:\n",
+        "      try:\n",
+        "        shutil.rmtree(d)\n",
+        "      except error:\n",
+        "        pass\n",
+        "    self.directory=[]"
+      ],
+      "metadata": {
+        "id": "zziEwi8jlf6R"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# Download extract and load a text"
+      ],
+      "metadata": {
+        "id": "jDpabofXVXv3"
+      }
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "DCs24HslMhpF"
+      },
+      "outputs": [],
+      "source": [
+        "ar = data(extract_loc='extract_name',is_download=True,is_zip=True,url='https://github.com/jazzar-dev/ai-big-data-course/archive/refs/heads/main.zip')\n",
+        "ar = ar.x\n",
+        "ar"
+      ]
+    }
+  ]
+}