1
+ {
2
+ "nbformat" : 4 ,
3
+ "nbformat_minor" : 0 ,
4
+ "metadata" : {
5
+ "colab" : {
6
+ "name" : " Untitled15.ipynb" ,
7
+ "private_outputs" : true ,
8
+ "provenance" : [],
9
+ "collapsed_sections" : [],
10
+ "authorship_tag" : " ABX9TyMjyfI1mJ1x53gJIzD7svPA" ,
11
+ "include_colab_link" : true
12
+ },
13
+ "kernelspec" : {
14
+ "name" : " python3" ,
15
+ "display_name" : " Python 3"
16
+ },
17
+ "language_info" : {
18
+ "name" : " python"
19
+ }
20
+ },
21
+ "cells" : [
22
+ {
23
+ "cell_type" : " markdown" ,
24
+ "metadata" : {
25
+ "id" : " view-in-github" ,
26
+ "colab_type" : " text"
27
+ },
28
+ "source" : [
29
+ " <a href=\" https://colab.research.google.com/github/jazzar-dev/Snippets/blob/main/data_and_ai.ipynb\" target=\" _parent\" ><img src=\" https://colab.research.google.com/assets/colab-badge.svg\" alt=\" Open In Colab\" /></a>"
30
+ ]
31
+ },
32
+ {
33
+ "cell_type" : " markdown" ,
34
+ "source" : [
35
+ " # Data class"
36
+ ],
37
+ "metadata" : {
38
+ "id" : " 5nAH5W-EVRT1"
39
+ }
40
+ },
41
+ {
42
+ "cell_type" : " code" ,
43
+ "source" : [
44
+ " from urllib import request\n " ,
45
+ " import pandas as pd\n " ,
46
+ " import zipfile\n " ,
47
+ " import numpy as np\n " ,
48
+ " from http.client import error\n " ,
49
+ " import tensorflow as tf\n " ,
50
+ " import matplotlib.pyplot as plt\n " ,
51
+ " import os\n " ,
52
+ " class data:\n " ,
53
+ " def __init__(self,directory=[], is_download = False,is_zip=False,extract_loc='./',text=False,url=None,file_save_name='Temporary_dataset',file_name=[],is_csv=False):\n " ,
54
+ " self.is_download = is_download\n " ,
55
+ " self.extract_loc = extract_loc\n " ,
56
+ " self.text = text\n " ,
57
+ " self.x = []\n " ,
58
+ " self.url = url\n " ,
59
+ " self.file_save_name = file_save_name\n " ,
60
+ " self.file_name = file_name\n " ,
61
+ " self.is_csv = is_csv\n " ,
62
+ " self.is_zip = is_zip\n " ,
63
+ " self.file_load_path = []\n " ,
64
+ " self.directory = directory\n " ,
65
+ " if self.is_download and self.url:\n " ,
66
+ " data.download(self)\n " ,
67
+ " elif text:\n " ,
68
+ " self.load_txt()\n " ,
69
+ " elif is_csv:\n " ,
70
+ " self.load_csv()\n " ,
71
+ " elif self.directory !=[]:\n " ,
72
+ " self.remove_dir()\n " ,
73
+ " def download(self):\n " ,
74
+ " request.urlretrieve(self.url, self.file_save_name)\n " ,
75
+ " if self.is_zip:\n " ,
76
+ " with zipfile.ZipFile(self.file_save_name, 'r') as zip_ref:\n " ,
77
+ " zip_ref.extractall(self.extract_loc)\n " ,
78
+ " os.remove(self.file_save_name)\n " ,
79
+ " try:\n " ,
80
+ " if self.text:\n " ,
81
+ " self.load_txt()\n " ,
82
+ " elif self.is_csv:\n " ,
83
+ " self.load_csv()\n " ,
84
+ " except error:\n " ,
85
+ " return 0\n " ,
86
+ " def load_csv(self):\n " ,
87
+ " if self.file_load_path == []:\n " ,
88
+ " try:\n " ,
89
+ " for root, dirs, files in os.walk(self.extract_loc):\n " ,
90
+ " for name in files:\n " ,
91
+ " for f in self.file_name: \n " ,
92
+ " if name == f:\n " ,
93
+ " self.file_load_path.append(os.path.abspath(os.path.join(root, name)))\n " ,
94
+ " except error:\n " ,
95
+ " return 0\n " ,
96
+ " for i in range(len(self.file_load_path)):\n " ,
97
+ " self.x.append(self.file_load_path[i])\n " ,
98
+ " self.x.append(pd.read_csv(self.file_load_path[i]))\n " ,
99
+ " def load_txt(self):\n " ,
100
+ " if self.file_load_path == []:\n " ,
101
+ " try:\n " ,
102
+ " for root, dirs, files in os.walk(self.extract_loc):\n " ,
103
+ " for name in files:\n " ,
104
+ " for f in self.file_name:\n " ,
105
+ " if name == f:\n " ,
106
+ " self.file_load_path.append(os.path.abspath(os.path.join(root, name)))\n " ,
107
+ " for i in range(len(self.file_load_path)):\n " ,
108
+ " self.x.append(self.file_load_path[i])\n " ,
109
+ " self.x.append(np.loadtxt(self.file_load_path[i]))\n " ,
110
+ " except error:\n " ,
111
+ " return \" error in load txt\"\n " ,
112
+ " \n " ,
113
+ " def remove_dir(self):\n " ,
114
+ " for d in self.directory:\n " ,
115
+ " try:\n " ,
116
+ " shutil.rmtree(d)\n " ,
117
+ " except error:\n " ,
118
+ " pass\n " ,
119
+ " self.directory=[]"
120
+ ],
121
+ "metadata" : {
122
+ "id" : " zziEwi8jlf6R"
123
+ },
124
+ "execution_count" : null ,
125
+ "outputs" : []
126
+ },
127
+ {
128
+ "cell_type" : " markdown" ,
129
+ "source" : [
130
+ " # Download extract and load a text"
131
+ ],
132
+ "metadata" : {
133
+ "id" : " jDpabofXVXv3"
134
+ }
135
+ },
136
+ {
137
+ "cell_type" : " code" ,
138
+ "execution_count" : null ,
139
+ "metadata" : {
140
+ "id" : " DCs24HslMhpF"
141
+ },
142
+ "outputs" : [],
143
+ "source" : [
144
+ " ar = data(extract_loc='extract_name',is_download=True,is_zip=True,url='https://github.com/jazzar-dev/ai-big-data-course/archive/refs/heads/main.zip')\n " ,
145
+ " ar = ar.x\n " ,
146
+ " ar"
147
+ ]
148
+ }
149
+ ]
150
+ }
0 commit comments