DataStories-UniPi · EktorasDEV10 · Feb 18, 2023 · Feb 20, 2023 · Feb 20, 2023
diff --git a/mdb.py b/mdb.py
@@ -97,6 +97,7 @@ def create_query_plan(query, keywords, action):
         args = dic['create table'][dic['create table'].index('('):dic['create table'].index(')')+1]
         dic['create table'] = dic['create table'].removesuffix(args).strip()
         arg_nopk = args.replace('primary key', '')[1:-1]
+        arg_nopk = arg_nopk.replace('unique', '')[1:-1]
         arglist = [val.strip().split(' ') for val in arg_nopk.split(',')]
         dic['column_names'] = ','.join([val[0] for val in arglist])
         dic['column_types'] = ','.join([val[1] for val in arglist])
@@ -105,6 +106,13 @@ def create_query_plan(query, keywords, action):
             dic['primary key'] = arglist[arglist.index('primary')-2]
         else:
             dic['primary key'] = None
+
+        # CREATING ENVIROMENT FOR UNIQUE (INCLUDE `UNIQUE` IN DIC)
+        if 'unique' in args:
+            arglist = args[1:-1].split(' ')
+            dic['unique'] = arglist[arglist.index('unique,') - 2]
+        else:
+            dic['unique'] = None
 
     if action=='import': 
         dic = {'import table' if key=='import' else key: val for key, val in dic.items()}

diff --git a/miniDB/database.py b/miniDB/database.py
@@ -101,21 +101,22 @@ def _update(self):
         self._update_meta_insert_stack()
 
 
-    def create_table(self, name, column_names, column_types, primary_key=None, load=None):
+    def create_table(self, name, column_names, column_types, primary_key=None, unique=None, load=None):
         '''
         This method create a new table. This table is saved and can be accessed via db_object.tables['table_name'] or db_object.table_name
 
         Args:
             name: string. Name of table.
             column_names: list. Names of columns.
             column_types: list. Types of columns.
+            unique: string. Unique column(if it exists).
             primary_key: string. The primary key (if it exists).
             load: boolean. Defines table object parameters as the name of the table and the column names.
         '''
         # print('here -> ', column_names.split(','))
-        self.tables.update({name: Table(name=name, column_names=column_names.split(','), column_types=column_types.split(','), primary_key=primary_key, load=load)})
+        self.tables.update({name: Table(name=name, column_names=column_names.split(','), column_types=column_types.split(','), primary_key=primary_key, unique=unique, load=load)})
         # self._name = Table(name=name, column_names=column_names, column_types=column_types, load=load)
-        # check that new dynamic var doesnt exist already
+        # check that new dynamic var doesn't exist already
         # self.no_of_tables += 1
         self._update()
         self.save_database()
@@ -352,20 +353,22 @@ def select(self, columns, table_name, condition, distinct=None, order_by=None, \
             distinct: boolean. If True, the resulting table will contain only unique rows.
         '''
 
-        # print(table_name)
         self.load_database()
         if isinstance(table_name,Table):
             return table_name._select_where(columns, condition, distinct, order_by, desc, limit)
 
         if condition is not None:
-            condition_column = split_condition(condition)[0]
+            if split_condition(condition) is not None:
+                condition_column = split_condition(condition)[0]
         else:
             condition_column = ''
 
 
         # self.lock_table(table_name, mode='x')
         if self.is_locked(table_name):
             return
+
+
         if self._has_index(table_name) and condition_column==self.tables[table_name].column_names[self.tables[table_name].pk_idx]:
             index_name = self.select('*', 'meta_indexes', f'table_name={table_name}', return_object=True).column_by_name('index_name')[0]
             bt = self._load_idx(index_name)
@@ -659,14 +662,15 @@ def create_index(self, index_name, table_name, index_type='btree'):
             table_name: string. Table name (must be part of database).
             index_name: string. Name of the created index.
         '''
-        if self.tables[table_name].pk_idx is None: # if no primary key, no index
-            raise Exception('Cannot create index. Table has no primary key.')
+
+        # if self.tables[table_name].pk_idx is None: # if no primary key, no index
+            # raise Exception('Cannot create index. Table has no primary key.')
         if index_name not in self.tables['meta_indexes'].column_by_name('index_name'):
             # currently only btree is supported. This can be changed by adding another if.
             if index_type=='btree':
-                logging.info('Creating Btree index.')
+                # logging.info('Creating Btree index.')
                 # insert a record with the name of the index and the table on which it's created to the meta_indexes table
-                self.tables['meta_indexes']._insert([table_name, index_name])
+                # self.tables['meta_indexes']._insert([table_name, index_name])
                 # crate the actual index
                 self._construct_index(table_name, index_name)
                 self.save_database()
@@ -680,16 +684,49 @@ def _construct_index(self, table_name, index_name):
         Args:
             table_name: string. Table name (must be part of database).
             index_name: string. Name of the created index.
+
+        Check if a column was specified`table_name(column_name)`
+
+        Actions:
+            If column is not specified, raise exception
+            We want both table_name and column_name, to check for PK or Unique column
+            if one of two exists then btree index is valid
         '''
         bt = Btree(3) # 3 is arbitrary
 
-        # for each record in the primary key of the table, insert its value and index to the btree
-        for idx, key in enumerate(self.tables[table_name].column_by_name(self.tables[table_name].pk)):
-            if key is None:
-                continue
-            bt.insert(key, idx)
-        # save the btree
-        self._save_index(index_name, bt)
+        if "(" in table_name:
+            brackett = table_name.split("(")
+            table_name = brackett[0].strip()
+            column_name = brackett[1].replace(")", "").strip()
+            table_to_check_all = None
+
+            if self.tables[table_name].pk is not None:
+                # check if column with pk is equal to column specified
+                if self.tables[table_name].pk == column_name:
+                    table_to_check_all = self.tables[table_name].column_by_name(self.tables[table_name].pk)
+
+            if self.tables[table_name].unique is not None:
+                # check if unique column with pk is equal to column specified
+                if self.tables[table_name].unique == column_name:
+                    table_to_check_all = self.tables[table_name].column_by_name(self.tables[table_name].unique)
+
+
+            if table_to_check_all is not None:
+                self.tables['meta_indexes']._insert([table_name, index_name])
+
+                # for each record in the primary key of the table, insert its value and index to the btree
+                for idx, key in enumerate(table_to_check_all):
+                    if key is None:
+                        continue
+                        bt.insert(key, idx)
+
+                # save the btree
+                print("Btree index", index_name, "created successfully!")
+                self._save_index(index_name, bt)
+            else:
+                raise Exception('Cannot create index. Column is not PK or UNIQUE!')
+        else:
+            raise Exception('Cannot create index. Column Name was not specified!')
 
 
     def _has_index(self, table_name):

diff --git a/miniDB/joins.py b/miniDB/joins.py
@@ -156,7 +156,7 @@ def join(self):
         join_table_name = ''
         join_table_colnames = left_names + right_names
         join_table_coltypes = self.left_table.column_types + self.right_table.column_types
-        join_table = Table(name=join_table_name, column_names=join_table_colnames, column_types= join_table_coltypes)
+        join_table = Table(name=join_table_name, column_names=join_table_colnames, column_types=join_table_coltypes)
 
         # Save merged file first. The hypothesis is that the RAM cannot fit the file, thus we have it saved
         # However we load the file to display it like this, might need to be changed in the future

diff --git a/miniDB/misc.py b/miniDB/misc.py
@@ -27,13 +27,30 @@ def split_condition(condition):
         if len(splt)>1:
             left, right = splt[0].strip(), splt[1].strip()
 
+            '''
+                Here, we check if an operator exists more than 2 times, so we break it in different pieces, else
+                code breaks and errors pop up. So now, we have a fixed version and can use in a condition 
+                the same operator multiple times.
+            '''
+            if condition.count(op_key) > 1:
+                new_cond = condition.split()
+                new_cond_index = new_cond.index(op_key)
+
+                new_cond1 = new_cond[:new_cond_index]
+                new_cond2 = new_cond[new_cond_index + 1:]
+                new_cond1 = ' '.join(new_cond1)
+                new_cond2 = ' '.join(new_cond2)
+
+                left = new_cond1
+                right = new_cond2
+
             if right[0] == '"' == right[-1]: # If the value has leading and trailing quotes, remove them.
                 right = right.strip('"')
             elif ' ' in right: # If it has whitespaces but no leading and trailing double quotes, throw.
                 raise ValueError(f'Invalid condition: {condition}\nValue must be enclosed in double quotation marks to include whitespaces.')
 
-            if right.find('"') != -1: # If there are any double quotes in the value, throw. (Notice we've already removed the leading and trailing ones)
-                raise ValueError(f'Invalid condition: {condition}\nDouble quotation marks are not allowed inside values.')
+            # if right.find('"') != -1: # If there are any double quotes in the value, throw. (Notice we've already removed the leading and trailing ones)
+            #     raise ValueError(f'Invalid condition: {condition}\nDouble quotation marks are not allowed inside values.')
 
             return left, op_key, right
 

diff --git a/miniDB/table.py b/miniDB/table.py
@@ -26,7 +26,7 @@ class Table:
             - a dictionary that includes the appropriate info (all the attributes in __init__)
 
     '''
-    def __init__(self, name=None, column_names=None, column_types=None, primary_key=None, load=None):
+    def __init__(self, name=None, column_names=None, column_types=None, primary_key=None, unique=None, load=None):
 
         if load is not None:
             # if load is a dict, replace the object dict with it (replaces the object with the specified one)
@@ -67,7 +67,14 @@ def __init__(self, name=None, column_names=None, column_types=None, primary_key=
             else:
                 self.pk_idx = None
 
+            if unique is not None:
+                print("unique_idx", unique, self.column_names.index(unique))
+                self.unique_idx = self.column_names.index(unique)
+            else:
+                self.unique_idx = None
+
             self.pk = primary_key
+            self.unique = unique
             # self._update()
 
     # if any of the name, columns_names and column types are none. return an empty table object
@@ -233,9 +240,16 @@ def _select_where(self, return_columns, condition=None, distinct=False, order_by
         # if condition is None, return all rows
         # if not, return the rows with values where condition is met for value
         if condition is not None:
-            column_name, operator, value = self._parse_condition(condition)
-            column = self.column_by_name(column_name)
-            rows = [ind for ind, x in enumerate(column) if get_op(operator, x, value)]
+            # check the condition status (not,between,or,and)
+            condition = self.Check_condition(condition, return_cols)
+
+            # check the type of the returned value of the condition. If it's a list , the rows are ready.
+            if isinstance(condition, list):
+                rows = condition
+            else:
+                column_name, operator, value = self._parse_condition(condition)
+                column = self.column_by_name(column_name)
+                rows = [ind for ind, x in enumerate(column) if get_op(operator, x, value)]
         else:
             rows = [i for i in range(len(self.data))]
 
@@ -269,6 +283,86 @@ def _select_where(self, return_columns, condition=None, distinct=False, order_by
 
         return s_table
 
+    # Check Condition, in general
+    def Check_condition(self, condition, cols):
+        new_condition = condition
+        if new_condition.startswith("not "):
+            new_condition = self.Check_NotCondition(condition, cols)
+        elif " between " in new_condition:
+            new_condition = self.Check_BetweenCondition(condition, cols);
+        elif " and " in new_condition:
+            new_condition = self.Check_AndCondition(condition, cols)
+        elif " or " in new_condition:
+            new_condition = self.Check_OrCondition(condition, cols);
+
+        return new_condition
+
+    # Target is to find the operator and reverse it for the `not` condition
+    def Check_NotCondition(self, condition, cols):
+        new_condition = condition.replace("not","")
+
+        if "<=" in new_condition:
+            new_condition = new_condition.replace("<=", ">")
+        elif ">=" in new_condition:
+            new_condition = new_condition.replace(">=", "<")
+        elif "<" in new_condition:
+            new_condition = new_condition.replace("<", ">=")
+        elif ">" in new_condition:
+            new_condition = new_condition.replace(">", "<=")
+
+        return new_condition
+
+    # Target is to find column name and the values between the `and` and then make a new custom condition.
+    # The new condition follows the `and` condition.
+    def Check_BetweenCondition(self, condition, cols):
+        new_condition = condition.replace("between", "")
+        new_condition = new_condition.replace("and", "")
+        new_condition = new_condition.split()
+
+        new_condition = new_condition[0] + " >= " + new_condition[1] + " and " + new_condition[0] + "<=" + new_condition[2]
+        new_condition = self.Check_AndCondition(new_condition, cols)
+        return new_condition
+
+    # In this condition (`and`), we need to get the common values from the lists created in this. Values between `and`
+    # could be 2+, so we need to loop through all the possible values.
+    # in the end a list with all values remaining.
+    def Check_AndCondition(self, condition, cols):
+        new_condition = condition.split(" and ")
+
+        final_result = []
+        counter = 0
+        for x in new_condition:
+            current_results = self.GetResults_Condition(x, cols)
+            if counter == 0:
+                final_result = current_results
+                counter += 1
+            final_result = list(set(final_result)&set(current_results))
+
+        new_condition = final_result
+        return new_condition
+
+    # In this condition (`or`), we need to get the all the values from the lists created in this. Values between `or`
+    # could be 2+, so we need to loop through all the possible values.
+    # in the end a list with all values remaining.
+    def Check_OrCondition(self, condition, cols):
+        new_condition = condition.split(" or ")
+
+        final_result = []
+        for x in new_condition:
+            current_results = self.GetResults_Condition(x, cols)
+            final_result += current_results
+
+        new_condition = list(set(final_result))
+        return new_condition
+
+    # Get a condition as parameter and return the rows
+    def GetResults_Condition(self, condition, cols):
+        column_name, operator, value = self._parse_condition(condition)
+        column = self.column_by_name(column_name)
+        rows = [ind for ind, x in enumerate(column) if get_op(operator, x, value)]
+
+        return rows
+
 
     def _select_where_with_btree(self, return_columns, bt, condition, distinct=False, order_by=None, desc=True, limit=None):
 
@@ -378,7 +472,7 @@ class CustomFailException(Exception):
         join_table_name = ''
         join_table_colnames = left_names+right_names
         join_table_coltypes = self.column_types+table_right.column_types
-        join_table = Table(name=join_table_name, column_names=join_table_colnames, column_types= join_table_coltypes)
+        join_table = Table(name=join_table_name, column_names=join_table_colnames, column_types=join_table_coltypes)
 
         return join_table, column_index_left, column_index_right, operator
 
@@ -533,6 +627,10 @@ def show(self, no_of_rows=None, is_locked=False):
         if self.pk_idx is not None:
             # table has a primary key, add PK next to the appropriate column
             headers[self.pk_idx] = headers[self.pk_idx]+' #PK#'
+        if self.unique_idx is not None:
+            # table has unique column, add UNIQUE next to the appropriate column
+            headers[self.unique_idx] = headers[self.unique_idx] + ' #UNIQUE#'
+
         # detect the rows that are no tfull of nones (these rows have been deleted)
         # if we dont skip these rows, the returning table has empty rows at the deleted positions
         non_none_rows = [row for row in self.data if any(row)]
@@ -558,6 +656,7 @@ def _parse_condition(self, condition, join=False):
 
         # cast the value with the specified column's type and return the column name, the operator and the casted value
         left, op, right = split_condition(condition)
+
         if left not in self.column_names:
             raise ValueError(f'Condition is not valid (cant find column name)')
         coltype = self.column_types[self.column_names.index(left)]