Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions mdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ def create_query_plan(query, keywords, action):
args = dic['create table'][dic['create table'].index('('):dic['create table'].index(')')+1]
dic['create table'] = dic['create table'].removesuffix(args).strip()
arg_nopk = args.replace('primary key', '')[1:-1]
arg_nopk = arg_nopk.replace('unique', '')[1:-1]
arglist = [val.strip().split(' ') for val in arg_nopk.split(',')]
dic['column_names'] = ','.join([val[0] for val in arglist])
dic['column_types'] = ','.join([val[1] for val in arglist])
Expand All @@ -105,6 +106,13 @@ def create_query_plan(query, keywords, action):
dic['primary key'] = arglist[arglist.index('primary')-2]
else:
dic['primary key'] = None

# CREATING ENVIROMENT FOR UNIQUE (INCLUDE `UNIQUE` IN DIC)
if 'unique' in args:
arglist = args[1:-1].split(' ')
dic['unique'] = arglist[arglist.index('unique,') - 2]
else:
dic['unique'] = None

if action=='import':
dic = {'import table' if key=='import' else key: val for key, val in dic.items()}
Expand Down
69 changes: 53 additions & 16 deletions miniDB/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,21 +101,22 @@ def _update(self):
self._update_meta_insert_stack()


def create_table(self, name, column_names, column_types, primary_key=None, load=None):
def create_table(self, name, column_names, column_types, primary_key=None, unique=None, load=None):
'''
This method create a new table. This table is saved and can be accessed via db_object.tables['table_name'] or db_object.table_name

Args:
name: string. Name of table.
column_names: list. Names of columns.
column_types: list. Types of columns.
unique: string. Unique column(if it exists).
primary_key: string. The primary key (if it exists).
load: boolean. Defines table object parameters as the name of the table and the column names.
'''
# print('here -> ', column_names.split(','))
self.tables.update({name: Table(name=name, column_names=column_names.split(','), column_types=column_types.split(','), primary_key=primary_key, load=load)})
self.tables.update({name: Table(name=name, column_names=column_names.split(','), column_types=column_types.split(','), primary_key=primary_key, unique=unique, load=load)})
# self._name = Table(name=name, column_names=column_names, column_types=column_types, load=load)
# check that new dynamic var doesnt exist already
# check that new dynamic var doesn't exist already
# self.no_of_tables += 1
self._update()
self.save_database()
Expand Down Expand Up @@ -352,20 +353,22 @@ def select(self, columns, table_name, condition, distinct=None, order_by=None, \
distinct: boolean. If True, the resulting table will contain only unique rows.
'''

# print(table_name)
self.load_database()
if isinstance(table_name,Table):
return table_name._select_where(columns, condition, distinct, order_by, desc, limit)

if condition is not None:
condition_column = split_condition(condition)[0]
if split_condition(condition) is not None:
condition_column = split_condition(condition)[0]
else:
condition_column = ''


# self.lock_table(table_name, mode='x')
if self.is_locked(table_name):
return


if self._has_index(table_name) and condition_column==self.tables[table_name].column_names[self.tables[table_name].pk_idx]:
index_name = self.select('*', 'meta_indexes', f'table_name={table_name}', return_object=True).column_by_name('index_name')[0]
bt = self._load_idx(index_name)
Expand Down Expand Up @@ -659,14 +662,15 @@ def create_index(self, index_name, table_name, index_type='btree'):
table_name: string. Table name (must be part of database).
index_name: string. Name of the created index.
'''
if self.tables[table_name].pk_idx is None: # if no primary key, no index
raise Exception('Cannot create index. Table has no primary key.')

# if self.tables[table_name].pk_idx is None: # if no primary key, no index
# raise Exception('Cannot create index. Table has no primary key.')
if index_name not in self.tables['meta_indexes'].column_by_name('index_name'):
# currently only btree is supported. This can be changed by adding another if.
if index_type=='btree':
logging.info('Creating Btree index.')
# logging.info('Creating Btree index.')
# insert a record with the name of the index and the table on which it's created to the meta_indexes table
self.tables['meta_indexes']._insert([table_name, index_name])
# self.tables['meta_indexes']._insert([table_name, index_name])
# crate the actual index
self._construct_index(table_name, index_name)
self.save_database()
Expand All @@ -680,16 +684,49 @@ def _construct_index(self, table_name, index_name):
Args:
table_name: string. Table name (must be part of database).
index_name: string. Name of the created index.

Check if a column was specified`table_name(column_name)`

Actions:
If column is not specified, raise exception
We want both table_name and column_name, to check for PK or Unique column
if one of two exists then btree index is valid
'''
bt = Btree(3) # 3 is arbitrary

# for each record in the primary key of the table, insert its value and index to the btree
for idx, key in enumerate(self.tables[table_name].column_by_name(self.tables[table_name].pk)):
if key is None:
continue
bt.insert(key, idx)
# save the btree
self._save_index(index_name, bt)
if "(" in table_name:
brackett = table_name.split("(")
table_name = brackett[0].strip()
column_name = brackett[1].replace(")", "").strip()
table_to_check_all = None

if self.tables[table_name].pk is not None:
# check if column with pk is equal to column specified
if self.tables[table_name].pk == column_name:
table_to_check_all = self.tables[table_name].column_by_name(self.tables[table_name].pk)

if self.tables[table_name].unique is not None:
# check if unique column with pk is equal to column specified
if self.tables[table_name].unique == column_name:
table_to_check_all = self.tables[table_name].column_by_name(self.tables[table_name].unique)


if table_to_check_all is not None:
self.tables['meta_indexes']._insert([table_name, index_name])

# for each record in the primary key of the table, insert its value and index to the btree
for idx, key in enumerate(table_to_check_all):
if key is None:
continue
bt.insert(key, idx)

# save the btree
print("Btree index", index_name, "created successfully!")
self._save_index(index_name, bt)
else:
raise Exception('Cannot create index. Column is not PK or UNIQUE!')
else:
raise Exception('Cannot create index. Column Name was not specified!')


def _has_index(self, table_name):
Expand Down
2 changes: 1 addition & 1 deletion miniDB/joins.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ def join(self):
join_table_name = ''
join_table_colnames = left_names + right_names
join_table_coltypes = self.left_table.column_types + self.right_table.column_types
join_table = Table(name=join_table_name, column_names=join_table_colnames, column_types= join_table_coltypes)
join_table = Table(name=join_table_name, column_names=join_table_colnames, column_types=join_table_coltypes)

# Save merged file first. The hypothesis is that the RAM cannot fit the file, thus we have it saved
# However we load the file to display it like this, might need to be changed in the future
Expand Down
21 changes: 19 additions & 2 deletions miniDB/misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,30 @@ def split_condition(condition):
if len(splt)>1:
left, right = splt[0].strip(), splt[1].strip()

'''
Here, we check if an operator exists more than 2 times, so we break it in different pieces, else
code breaks and errors pop up. So now, we have a fixed version and can use in a condition
the same operator multiple times.
'''
if condition.count(op_key) > 1:
new_cond = condition.split()
new_cond_index = new_cond.index(op_key)

new_cond1 = new_cond[:new_cond_index]
new_cond2 = new_cond[new_cond_index + 1:]
new_cond1 = ' '.join(new_cond1)
new_cond2 = ' '.join(new_cond2)

left = new_cond1
right = new_cond2

if right[0] == '"' == right[-1]: # If the value has leading and trailing quotes, remove them.
right = right.strip('"')
elif ' ' in right: # If it has whitespaces but no leading and trailing double quotes, throw.
raise ValueError(f'Invalid condition: {condition}\nValue must be enclosed in double quotation marks to include whitespaces.')

if right.find('"') != -1: # If there are any double quotes in the value, throw. (Notice we've already removed the leading and trailing ones)
raise ValueError(f'Invalid condition: {condition}\nDouble quotation marks are not allowed inside values.')
# if right.find('"') != -1: # If there are any double quotes in the value, throw. (Notice we've already removed the leading and trailing ones)
# raise ValueError(f'Invalid condition: {condition}\nDouble quotation marks are not allowed inside values.')

return left, op_key, right

Expand Down
109 changes: 104 additions & 5 deletions miniDB/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class Table:
- a dictionary that includes the appropriate info (all the attributes in __init__)

'''
def __init__(self, name=None, column_names=None, column_types=None, primary_key=None, load=None):
def __init__(self, name=None, column_names=None, column_types=None, primary_key=None, unique=None, load=None):

if load is not None:
# if load is a dict, replace the object dict with it (replaces the object with the specified one)
Expand Down Expand Up @@ -67,7 +67,14 @@ def __init__(self, name=None, column_names=None, column_types=None, primary_key=
else:
self.pk_idx = None

if unique is not None:
print("unique_idx", unique, self.column_names.index(unique))
self.unique_idx = self.column_names.index(unique)
else:
self.unique_idx = None

self.pk = primary_key
self.unique = unique
# self._update()

# if any of the name, columns_names and column types are none. return an empty table object
Expand Down Expand Up @@ -233,9 +240,16 @@ def _select_where(self, return_columns, condition=None, distinct=False, order_by
# if condition is None, return all rows
# if not, return the rows with values where condition is met for value
if condition is not None:
column_name, operator, value = self._parse_condition(condition)
column = self.column_by_name(column_name)
rows = [ind for ind, x in enumerate(column) if get_op(operator, x, value)]
# check the condition status (not,between,or,and)
condition = self.Check_condition(condition, return_cols)

# check the type of the returned value of the condition. If it's a list , the rows are ready.
if isinstance(condition, list):
rows = condition
else:
column_name, operator, value = self._parse_condition(condition)
column = self.column_by_name(column_name)
rows = [ind for ind, x in enumerate(column) if get_op(operator, x, value)]
else:
rows = [i for i in range(len(self.data))]

Expand Down Expand Up @@ -269,6 +283,86 @@ def _select_where(self, return_columns, condition=None, distinct=False, order_by

return s_table

# Check Condition, in general
def Check_condition(self, condition, cols):
new_condition = condition
if new_condition.startswith("not "):
new_condition = self.Check_NotCondition(condition, cols)
elif " between " in new_condition:
new_condition = self.Check_BetweenCondition(condition, cols);
elif " and " in new_condition:
new_condition = self.Check_AndCondition(condition, cols)
elif " or " in new_condition:
new_condition = self.Check_OrCondition(condition, cols);

return new_condition

# Target is to find the operator and reverse it for the `not` condition
def Check_NotCondition(self, condition, cols):
new_condition = condition.replace("not","")

if "<=" in new_condition:
new_condition = new_condition.replace("<=", ">")
elif ">=" in new_condition:
new_condition = new_condition.replace(">=", "<")
elif "<" in new_condition:
new_condition = new_condition.replace("<", ">=")
elif ">" in new_condition:
new_condition = new_condition.replace(">", "<=")

return new_condition

# Target is to find column name and the values between the `and` and then make a new custom condition.
# The new condition follows the `and` condition.
def Check_BetweenCondition(self, condition, cols):
new_condition = condition.replace("between", "")
new_condition = new_condition.replace("and", "")
new_condition = new_condition.split()

new_condition = new_condition[0] + " >= " + new_condition[1] + " and " + new_condition[0] + "<=" + new_condition[2]
new_condition = self.Check_AndCondition(new_condition, cols)
return new_condition

# In this condition (`and`), we need to get the common values from the lists created in this. Values between `and`
# could be 2+, so we need to loop through all the possible values.
# in the end a list with all values remaining.
def Check_AndCondition(self, condition, cols):
new_condition = condition.split(" and ")

final_result = []
counter = 0
for x in new_condition:
current_results = self.GetResults_Condition(x, cols)
if counter == 0:
final_result = current_results
counter += 1
final_result = list(set(final_result)&set(current_results))

new_condition = final_result
return new_condition

# In this condition (`or`), we need to get the all the values from the lists created in this. Values between `or`
# could be 2+, so we need to loop through all the possible values.
# in the end a list with all values remaining.
def Check_OrCondition(self, condition, cols):
new_condition = condition.split(" or ")

final_result = []
for x in new_condition:
current_results = self.GetResults_Condition(x, cols)
final_result += current_results

new_condition = list(set(final_result))
return new_condition

# Get a condition as parameter and return the rows
def GetResults_Condition(self, condition, cols):
column_name, operator, value = self._parse_condition(condition)
column = self.column_by_name(column_name)
rows = [ind for ind, x in enumerate(column) if get_op(operator, x, value)]

return rows


def _select_where_with_btree(self, return_columns, bt, condition, distinct=False, order_by=None, desc=True, limit=None):

Expand Down Expand Up @@ -378,7 +472,7 @@ class CustomFailException(Exception):
join_table_name = ''
join_table_colnames = left_names+right_names
join_table_coltypes = self.column_types+table_right.column_types
join_table = Table(name=join_table_name, column_names=join_table_colnames, column_types= join_table_coltypes)
join_table = Table(name=join_table_name, column_names=join_table_colnames, column_types=join_table_coltypes)

return join_table, column_index_left, column_index_right, operator

Expand Down Expand Up @@ -533,6 +627,10 @@ def show(self, no_of_rows=None, is_locked=False):
if self.pk_idx is not None:
# table has a primary key, add PK next to the appropriate column
headers[self.pk_idx] = headers[self.pk_idx]+' #PK#'
if self.unique_idx is not None:
# table has unique column, add UNIQUE next to the appropriate column
headers[self.unique_idx] = headers[self.unique_idx] + ' #UNIQUE#'

# detect the rows that are no tfull of nones (these rows have been deleted)
# if we dont skip these rows, the returning table has empty rows at the deleted positions
non_none_rows = [row for row in self.data if any(row)]
Expand All @@ -558,6 +656,7 @@ def _parse_condition(self, condition, join=False):

# cast the value with the specified column's type and return the column name, the operator and the casted value
left, op, right = split_condition(condition)

if left not in self.column_names:
raise ValueError(f'Condition is not valid (cant find column name)')
coltype = self.column_types[self.column_names.index(left)]
Expand Down