Skip to content

Commit 9739658

Browse files
committed
Add hash table partial implementation template and unit tests
1 parent 017faf1 commit 9739658

File tree

2 files changed

+359
-0
lines changed

2 files changed

+359
-0
lines changed

source/hashtable.py

Lines changed: 204 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,204 @@
1+
#!python
2+
3+
from linkedlist import LinkedList
4+
5+
6+
class HashTable(object):
7+
8+
def __init__(self, init_size=8):
9+
"""Initialize this hash table with the given initial size."""
10+
self.buckets = [LinkedList() for i in range(init_size)]
11+
self.size = 0 # Number of key-value entries
12+
13+
def __str__(self):
14+
"""Return a formatted string representation of this hash table."""
15+
items = ['{!r}: {!r}'.format(key, val) for key, val in self.items()]
16+
return '{' + ', '.join(items) + '}'
17+
18+
def __repr__(self):
19+
"""Return a string representation of this hash table."""
20+
return 'HashTable({!r})'.format(self.items())
21+
22+
def _bucket_index(self, key):
23+
"""Return the bucket index where the given key would be stored."""
24+
return hash(key) % len(self.buckets)
25+
26+
def load_factor(self):
27+
"""Return the load factor, the ratio of number of entries to buckets.
28+
Best and worst case running time: ??? under what conditions? [TODO]"""
29+
# TODO: Calculate load factor
30+
# return ...
31+
32+
def keys(self):
33+
"""Return a list of all keys in this hash table.
34+
Best and worst case running time: ??? under what conditions? [TODO]"""
35+
# Collect all keys in each of the buckets
36+
all_keys = []
37+
for bucket in self.buckets:
38+
for key, value in bucket.items():
39+
all_keys.append(key)
40+
return all_keys
41+
42+
def values(self):
43+
"""Return a list of all values in this hash table.
44+
Best and worst case running time: ??? under what conditions? [TODO]"""
45+
# Collect all values in each of the buckets
46+
all_values = []
47+
for bucket in self.buckets:
48+
for key, value in bucket.items():
49+
all_values.append(value)
50+
return all_values
51+
52+
def items(self):
53+
"""Return a list of all entries (key-value pairs) in this hash table.
54+
Best and worst case running time: ??? under what conditions? [TODO]"""
55+
# Collect all pairs of key-value entries in each of the buckets
56+
all_items = []
57+
for bucket in self.buckets:
58+
all_items.extend(bucket.items())
59+
return all_items
60+
61+
def length(self):
62+
"""Return the number of key-value entries by traversing its buckets.
63+
Best and worst case running time: ??? under what conditions? [TODO]"""
64+
# Count number of key-value entries in each of the buckets
65+
item_count = 0
66+
for bucket in self.buckets:
67+
item_count += bucket.length()
68+
return item_count
69+
# Equivalent to this list comprehension:
70+
return sum(bucket.length() for bucket in self.buckets)
71+
72+
def contains(self, key):
73+
"""Return True if this hash table contains the given key, or False.
74+
Best case running time: ??? under what conditions? [TODO]
75+
Worst case running time: ??? under what conditions? [TODO]"""
76+
# Find the bucket the given key belongs in
77+
index = self._bucket_index(key)
78+
bucket = self.buckets[index]
79+
# Check if an entry with the given key exists in that bucket
80+
entry = bucket.find(lambda (k, v): k == key)
81+
return entry is not None # True or False
82+
83+
def get(self, key):
84+
"""Return the value associated with the given key, or raise KeyError.
85+
Best case running time: ??? under what conditions? [TODO]
86+
Worst case running time: ??? under what conditions? [TODO]"""
87+
# Find the bucket the given key belongs in
88+
index = self._bucket_index(key)
89+
bucket = self.buckets[index]
90+
# Find the entry with the given key in that bucket, if one exists
91+
entry = bucket.find(lambda (k, v): k == key)
92+
if entry is not None: # Found
93+
# Return the given key's associated value
94+
assert isinstance(entry, tuple)
95+
assert len(entry) == 2
96+
return entry[1]
97+
else: # Not found
98+
raise KeyError('Key not found: {}'.format(key))
99+
100+
def set(self, key, value):
101+
"""Insert or update the given key with its associated value.
102+
Best case running time: ??? under what conditions? [TODO]
103+
Worst case running time: ??? under what conditions? [TODO]"""
104+
# Find the bucket the given key belongs in
105+
index = self._bucket_index(key)
106+
bucket = self.buckets[index]
107+
# Find the entry with the given key in that bucket, if one exists
108+
# Check if an entry with the given key exists in that bucket
109+
entry = bucket.find(lambda (k, v): k == key)
110+
if entry is not None: # Found
111+
# In this case, the given key's value is being updated
112+
# Remove the old key-value entry from the bucket first
113+
bucket.delete(entry)
114+
# Insert the new key-value entry into the bucket in either case
115+
bucket.append((key, value))
116+
# TODO: Check if the load factor exceeds a threshold such as 0.75
117+
# ...
118+
# TODO: If so, automatically resize to reduce the load factor
119+
# ...
120+
121+
def delete(self, key):
122+
"""Delete the given key and its associated value, or raise KeyError.
123+
Best case running time: ??? under what conditions? [TODO]
124+
Worst case running time: ??? under what conditions? [TODO]"""
125+
# Find the bucket the given key belongs in
126+
index = self._bucket_index(key)
127+
bucket = self.buckets[index]
128+
# Find the entry with the given key in that bucket, if one exists
129+
entry = bucket.find(lambda (k, v): k == key)
130+
if entry is not None: # Found
131+
# Remove the key-value entry from the bucket
132+
bucket.delete(entry)
133+
else: # Not found
134+
raise KeyError('Key not found: {}'.format(key))
135+
136+
def _resize(self, new_size=None):
137+
"""Resize this hash table's buckets and rehash all key-value entries.
138+
Should be called automatically when load factor exceeds a threshold
139+
such as 0.75 after an insertion (when set is called with a new key).
140+
Best and worst case running time: ??? under what conditions? [TODO]
141+
Best and worst case space usage: ??? what uses this memory? [TODO]"""
142+
# If unspecified, choose new size dynamically based on current size
143+
if new_size is None:
144+
new_size = len(self.buckets) * 2 # Double size
145+
# Option to reduce size if buckets are sparsely filled (low load factor)
146+
elif new_size is 0:
147+
new_size = len(self.buckets) / 2 # Half size
148+
# TODO: Get a list to temporarily hold all current key-value entries
149+
# ...
150+
# TODO: Create a new list of new_size total empty linked list buckets
151+
# ...
152+
# TODO: Insert each key-value entry into the new list of buckets,
153+
# which will rehash them into a new bucket index based on the new size
154+
# ...
155+
156+
157+
def test_hash_table():
158+
ht = HashTable(4)
159+
print('HashTable: ' + str(ht))
160+
161+
print('Setting entries:')
162+
ht.set('I', 1)
163+
print('set(I, 1): ' + str(ht))
164+
ht.set('V', 5)
165+
print('set(V, 5): ' + str(ht))
166+
print('size: ' + str(ht.size))
167+
print('length: ' + str(ht.length()))
168+
print('buckets: ' + str(len(ht.buckets)))
169+
print('load_factor: ' + str(ht.load_factor()))
170+
ht.set('X', 10)
171+
print('set(X, 10): ' + str(ht))
172+
ht.set('L', 50) # Should trigger resize
173+
print('set(L, 50): ' + str(ht))
174+
print('size: ' + str(ht.size))
175+
print('length: ' + str(ht.length()))
176+
print('buckets: ' + str(len(ht.buckets)))
177+
print('load_factor: ' + str(ht.load_factor()))
178+
179+
print('Getting entries:')
180+
print('get(I): ' + str(ht.get('I')))
181+
print('get(V): ' + str(ht.get('V')))
182+
print('get(X): ' + str(ht.get('X')))
183+
print('get(L): ' + str(ht.get('L')))
184+
print('contains(X): ' + str(ht.contains('X')))
185+
print('contains(Z): ' + str(ht.contains('Z')))
186+
187+
print('Deleting entries:')
188+
ht.delete('I')
189+
print('delete(I): ' + str(ht))
190+
ht.delete('V')
191+
print('delete(V): ' + str(ht))
192+
ht.delete('X')
193+
print('delete(X): ' + str(ht))
194+
ht.delete('L')
195+
print('delete(L): ' + str(ht))
196+
print('contains(X): ' + str(ht.contains('X')))
197+
print('size: ' + str(ht.size))
198+
print('length: ' + str(ht.length()))
199+
print('buckets: ' + str(len(ht.buckets)))
200+
print('load_factor: ' + str(ht.load_factor()))
201+
202+
203+
if __name__ == '__main__':
204+
test_hash_table()

source/hashtable_test.py

Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
#!python
2+
3+
from hashtable import HashTable
4+
import unittest
5+
6+
7+
class HashTableTest(unittest.TestCase):
8+
9+
def test_init(self):
10+
ht = HashTable(4)
11+
assert len(ht.buckets) == 4
12+
assert ht.length() == 0
13+
assert ht.size == 0
14+
15+
def test_keys(self):
16+
ht = HashTable()
17+
assert ht.keys() == []
18+
ht.set('I', 1)
19+
assert ht.keys() == ['I']
20+
ht.set('V', 5)
21+
self.assertItemsEqual(ht.keys(), ['I', 'V']) # Ignore item order
22+
# Python 3 unittest module renamed this assertion method
23+
# self.assertCountEqual(ht.keys(), ['I', 'V']) # Ignore item order
24+
ht.set('X', 10)
25+
self.assertItemsEqual(ht.keys(), ['I', 'V', 'X']) # Ignore item order
26+
# Python 3 unittest module renamed this assertion method
27+
# self.assertCountEqual(ht.keys(), ['I', 'V', 'X']) # Ignore item order
28+
29+
def test_values(self):
30+
ht = HashTable()
31+
assert ht.values() == []
32+
ht.set('I', 1)
33+
assert ht.values() == [1]
34+
ht.set('V', 5)
35+
self.assertItemsEqual(ht.values(), [1, 5]) # Ignore item order
36+
# Python 3 unittest module renamed this assertion method
37+
# self.assertCountEqual(ht.values(), [1, 5]) # Ignore item order
38+
ht.set('X', 10)
39+
self.assertItemsEqual(ht.values(), [1, 5, 10]) # Ignore item order
40+
# Python 3 unittest module renamed this assertion method
41+
# self.assertCountEqual(ht.values(), [1, 5, 10]) # Ignore item order
42+
43+
def test_items(self):
44+
ht = HashTable()
45+
assert ht.items() == []
46+
ht.set('I', 1)
47+
assert ht.items() == [('I', 1)]
48+
ht.set('V', 5)
49+
self.assertItemsEqual(ht.items(), [('I', 1), ('V', 5)])
50+
# Python 3 unittest module renamed this assertion method
51+
# self.assertCountEqual(ht.items(), [('I', 1), ('V', 5)])
52+
ht.set('X', 10)
53+
self.assertItemsEqual(ht.items(), [('I', 1), ('V', 5), ('X', 10)])
54+
# Python 3 unittest module renamed this assertion method
55+
# self.assertCountEqual(ht.items(), [('I', 1), ('V', 5), ('X', 10)])
56+
57+
def test_length(self):
58+
ht = HashTable()
59+
assert ht.length() == 0
60+
ht.set('I', 1)
61+
assert ht.length() == 1
62+
ht.set('V', 5)
63+
assert ht.length() == 2
64+
ht.set('X', 10)
65+
assert ht.length() == 3
66+
67+
def test_size(self):
68+
ht = HashTable()
69+
assert ht.size == 0
70+
ht.set('I', 1)
71+
assert ht.size == 1
72+
ht.set('V', 5)
73+
assert ht.size == 2
74+
ht.set('X', 10)
75+
assert ht.size == 3
76+
77+
def test_resize(self):
78+
ht = HashTable(2) # Set init_size to 2
79+
assert ht.size == 0
80+
assert len(ht.buckets) == 2
81+
assert ht.load_factor() == 0
82+
ht.set('I', 1)
83+
assert ht.size == 1
84+
assert len(ht.buckets) == 2
85+
assert ht.load_factor() == 0.5
86+
ht.set('V', 5) # Should trigger resize
87+
assert ht.size == 2
88+
assert len(ht.buckets) == 4
89+
assert ht.load_factor() == 0.5
90+
ht.set('X', 10)
91+
assert ht.size == 3
92+
assert len(ht.buckets) == 4
93+
assert ht.load_factor() == 0.75
94+
ht.set('L', 50) # Should trigger resize
95+
assert ht.size == 4
96+
assert len(ht.buckets) == 8
97+
assert ht.load_factor() == 0.5
98+
99+
def test_contains(self):
100+
ht = HashTable()
101+
ht.set('I', 1)
102+
ht.set('V', 5)
103+
ht.set('X', 10)
104+
assert ht.contains('I') is True
105+
assert ht.contains('V') is True
106+
assert ht.contains('X') is True
107+
assert ht.contains('A') is False
108+
109+
def test_set_and_get(self):
110+
ht = HashTable()
111+
ht.set('I', 1)
112+
ht.set('V', 5)
113+
ht.set('X', 10)
114+
assert ht.get('I') == 1
115+
assert ht.get('V') == 5
116+
assert ht.get('X') == 10
117+
assert ht.length() == 3
118+
assert ht.size == 3
119+
with self.assertRaises(KeyError):
120+
ht.get('A') # Key does not exist
121+
122+
def test_set_twice_and_get(self):
123+
ht = HashTable()
124+
ht.set('I', 1)
125+
ht.set('V', 4)
126+
ht.set('X', 9)
127+
assert ht.length() == 3
128+
assert ht.size == 3
129+
ht.set('V', 5) # Update value
130+
ht.set('X', 10) # Update value
131+
assert ht.get('I') == 1
132+
assert ht.get('V') == 5
133+
assert ht.get('X') == 10
134+
assert ht.length() == 3 # Check length is not overcounting
135+
assert ht.size == 3 # Check size is not overcounting
136+
137+
def test_delete(self):
138+
ht = HashTable()
139+
ht.set('I', 1)
140+
ht.set('V', 5)
141+
ht.set('X', 10)
142+
assert ht.length() == 3
143+
assert ht.size == 3
144+
ht.delete('I')
145+
ht.delete('X')
146+
assert ht.length() == 1
147+
assert ht.size == 1
148+
with self.assertRaises(KeyError):
149+
ht.delete('X') # Key no longer exists
150+
with self.assertRaises(KeyError):
151+
ht.delete('A') # Key does not exist
152+
153+
154+
if __name__ == '__main__':
155+
unittest.main()

0 commit comments

Comments
 (0)