Skip to content

Commit 1f5ea61

Browse files
committed
Replace trie with general string-based-key hashmap
Previously, trie implementation is not consistent, mainly because of using index to point the referencing func_t to FUNCS, additionally, trie's advantage is that enables prefix lookup, but in shecc, it hasn't been used in this way, furthur more, it takes 512 bytes per trie node, while in this implementation, it 24 + W (W stands for key length including NULL character) bytes per hashmap bucket node, which significantly reduces memory usage. This also enables us to refactor more structures later with hashmap implementation in shecc.
1 parent 9208d7d commit 1f5ea61

File tree

2 files changed

+166
-85
lines changed

2 files changed

+166
-85
lines changed

src/defs.h

+11-4
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
#define MAX_LOCALS 1500
2121
#define MAX_FIELDS 64
2222
#define MAX_FUNCS 512
23-
#define MAX_FUNC_TRIES 2160
2423
#define MAX_TYPES 64
2524
#define MAX_IR_INSTR 50000
2625
#define MAX_BB_PRED 128
@@ -305,10 +304,18 @@ typedef struct {
305304
int value;
306305
} constant_t;
307306

307+
/* string-based hash map definitions */
308+
309+
typedef struct node {
310+
char *key;
311+
void *val;
312+
struct node *next;
313+
} node_t;
314+
308315
typedef struct {
309-
int index;
310-
int next[128];
311-
} trie_t;
316+
int size;
317+
node_t **buckets;
318+
} hashmap_t;
312319

313320
struct phi_operand {
314321
var_t *var;

src/globals.c

+155-81
Original file line numberDiff line numberDiff line change
@@ -12,18 +12,7 @@ block_list_t BLOCKS;
1212
macro_t *MACROS;
1313
int macros_idx = 0;
1414

15-
/* the first element is reserved for global scope */
16-
func_t *FUNCS;
17-
int funcs_idx = 1;
18-
19-
/* FUNC_TRIES is used to improve the performance of the find_func function.
20-
* Instead of searching through all functions and comparing their names, we can
21-
* utilize the trie data structure to search for existing functions efficiently.
22-
* The index starts from 1 because the first trie node represents an empty input
23-
* string, and it is not possible to record a function with an empty name.
24-
*/
25-
trie_t *FUNC_TRIES;
26-
int func_tries_idx = 1;
15+
hashmap_t *FUNCS_MAP;
2716

2817
type_t *TYPES;
2918
int types_idx = 0;
@@ -72,72 +61,159 @@ char *elf_strtab;
7261
char *elf_section;
7362

7463
/**
75-
* insert_trie() - Inserts a new element into the trie structure.
76-
* @trie: A pointer to the trie where the name will be inserted.
77-
* @name: The name to be inserted into the trie.
78-
* @funcs_index: The index of the pointer to the func_t. The index is recorded
79-
* in a 1-indexed format. Because the first element of 'FUNCS' has been
80-
* reserved, there is no need to shift it.
81-
* Return: The index of the pointer to the func_t.
64+
* hash_fnv1a() - hashes a string with FNV-1a hash function.
65+
* The result may be negative.
66+
* @key: The string to be hashed.
8267
*
83-
* If the function has been inserted, the return value is the index of the
84-
* function in FUNCS. Otherwise, the return value is the value of the parameter
85-
* @funcs_index.
68+
* @returns: The hash value of string.
8669
*/
87-
int insert_trie(trie_t *trie, char *name, int funcs_index)
88-
{
89-
char first_char;
90-
int fc;
91-
92-
while (1) {
93-
first_char = *name;
94-
fc = first_char;
95-
if (!fc) {
96-
if (!trie->index)
97-
trie->index = funcs_index;
98-
return trie->index;
99-
}
100-
if (!trie->next[fc]) {
101-
/* FIXME: The func_tries_idx variable may exceed the maximum number,
102-
* which can lead to a segmentation fault. This issue is affected by
103-
* the number of functions and the length of their names. The proper
104-
* way to handle this is to dynamically allocate a new element.
105-
*/
106-
trie->next[fc] = func_tries_idx++;
107-
for (int i = 0; i < 128; i++)
108-
FUNC_TRIES[trie->next[fc]].next[i] = 0;
109-
FUNC_TRIES[trie->next[fc]].index = 0;
110-
}
111-
trie = &FUNC_TRIES[trie->next[fc]];
112-
name++;
70+
int hash_fnv1a(char *key)
71+
{
72+
int hash = 0x811c9dc5;
73+
74+
for (; *key; key++) {
75+
hash ^= *key;
76+
hash *= 0x01000193;
11377
}
78+
79+
return hash;
80+
}
81+
82+
/**
83+
* hash_index_hashmap() - hashses a string and converts into
84+
* usable hashmap index.
85+
* @map: The key of node. Must not be NULL.
86+
* @key: The key string. May be NULL.
87+
*
88+
* @returns: The usable hashmap index.
89+
*/
90+
int hash_index(int size, char *key)
91+
{
92+
int hash = hash_fnv1a(key), mask = hash >> 31;
93+
return ((hash ^ mask) - mask) % size;
11494
}
11595

11696
/**
117-
* find_trie() - search the index of the function name in the trie
118-
* @trie: A pointer to the trie where the name will be searched.
119-
* @name: The name to be searched.
97+
* create_hashmap() - creates a hashmap on heap.
98+
* @size: The initial bucket size of hashmap.
12099
*
121-
* Return: The index of the pointer to the func_t.
100+
* @returns: The pointer of created hashmap.
101+
*/
102+
hashmap_t *create_hashmap(int size)
103+
{
104+
hashmap_t *map = malloc(sizeof(hashmap_t));
105+
map->size = size;
106+
map->buckets = malloc(size * sizeof(node_t *));
107+
108+
for (int i = 0; i < map->size; i++)
109+
map->buckets[i] = 0;
110+
111+
return map;
112+
}
113+
114+
/**
115+
* new_node() - creates a hashmap node on heap.
116+
* @key: The key of node. Must not be NULL.
117+
* @val: The value of node. Could be NULL.
122118
*
123-
* 0 - the name not found.
124-
* otherwise - the index of the founded index in the trie array.
119+
* @returns: The pointer of created node.
120+
*/
121+
node_t *new_node(char *key, void *val)
122+
{
123+
int len = strlen(key);
124+
node_t *node = malloc(sizeof(node_t));
125+
node->key = calloc(len + 1, sizeof(char));
126+
strcpy(node->key, key);
127+
node->val = val;
128+
node->next = NULL;
129+
return node;
130+
}
131+
132+
/**
133+
* put_hashmap() - puts a key-value pair into given hashmap.
134+
* If key already contains a value, then replace it with new
135+
* value, the old value will be freed.
136+
* @map: The hashmap to be put into. Must not be NULL.
137+
* @key: The key string. May be NULL.
138+
* @val: The value pointer. May be NULL. This value's lifetime
139+
* is held by hashmap.
125140
*/
126-
int find_trie(trie_t *trie, char *name)
127-
{
128-
char first_char;
129-
int fc;
130-
131-
while (1) {
132-
first_char = *name;
133-
fc = first_char;
134-
if (!fc)
135-
return trie->index;
136-
if (!trie->next[fc])
137-
return 0;
138-
trie = &FUNC_TRIES[trie->next[fc]];
139-
name++;
141+
void put_hashmap(hashmap_t *map, char *key, void *val)
142+
{
143+
int index = hash_index(map->size, key);
144+
node_t *cur = map->buckets[index];
145+
146+
if (!cur) {
147+
map->buckets[index] = new_node(key, val);
148+
} else {
149+
while (cur->next)
150+
cur = cur->next;
151+
cur->next = new_node(key, val);
140152
}
153+
154+
/* TODO: Rehash if size exceeds size * load factor */
155+
}
156+
157+
/**
158+
* get_hashmap() - gets value from hashmap from given key.
159+
* @map: The hashmap to be looked up. Must no be NULL.
160+
* @key: The key string. May be NULL.
161+
*
162+
* @returns: The look up result, if the key-value pair entry
163+
* exists, then returns its value's address, NULL otherwise.
164+
*/
165+
void *get_hashmap(hashmap_t *map, char *key)
166+
{
167+
int index = hash_index(map->size, key);
168+
node_t *cur = map->buckets[index];
169+
170+
while (cur) {
171+
if (!strcmp(cur->key, key))
172+
return cur->val;
173+
174+
cur = cur->next;
175+
}
176+
177+
return NULL;
178+
}
179+
180+
/**
181+
* contains_hashmap() - checks if the key-value pair entry exists
182+
* from given key.
183+
* @map: The hashmap to be looked up. Must no be NULL.
184+
* @key: The key string. May be NULL.
185+
*
186+
* @returns: The look up result, if the key-value pair entry
187+
* exists, then returns true, false otherwise.
188+
*/
189+
bool contains_hashmap(hashmap_t *map, char *key)
190+
{
191+
return get_hashmap(map, key) != NULL;
192+
}
193+
194+
/**
195+
* free_hashmap() - frees the hashmap, this also frees key-value pair
196+
* entry's value.
197+
* @map: The hashmap to be looked up. Must no be NULL.
198+
*/
199+
void free_hashmap(hashmap_t *map)
200+
{
201+
for (int i = 0; i < map->size; i++) {
202+
node_t *cur = map->buckets[i], *next;
203+
204+
while (cur) {
205+
next = cur->next;
206+
free(cur->key);
207+
free(cur->val);
208+
/* FIXME: Remove this if-clause will cause double free error */
209+
if (cur != map->buckets[0])
210+
free(cur);
211+
cur = next;
212+
}
213+
}
214+
215+
free(map->buckets);
216+
free(map);
141217
}
142218

143219
/* options */
@@ -318,12 +394,14 @@ int find_macro_param_src_idx(char *name, block_t *parent)
318394
func_t *add_func(char *name)
319395
{
320396
func_t *fn;
321-
int index = insert_trie(FUNC_TRIES, name, funcs_idx);
322-
if (index == funcs_idx) {
323-
fn = &FUNCS[funcs_idx++];
397+
if (contains_hashmap(FUNCS_MAP, name)) {
398+
fn = get_hashmap(FUNCS_MAP, name);
399+
} else {
400+
fn = malloc(sizeof(func_t));
401+
put_hashmap(FUNCS_MAP, name, fn);
324402
strcpy(fn->return_def.var_name, name);
325403
}
326-
fn = &FUNCS[index];
404+
327405
fn->stack_size = 4; /* starting point of stack */
328406
return fn;
329407
}
@@ -358,10 +436,7 @@ constant_t *find_constant(char alias[])
358436

359437
func_t *find_func(char func_name[])
360438
{
361-
int index = find_trie(FUNC_TRIES, func_name);
362-
if (index)
363-
return &FUNCS[index];
364-
return NULL;
439+
return get_hashmap(FUNCS_MAP, func_name);
365440
}
366441

367442
var_t *find_member(char token[], type_t *type)
@@ -597,8 +672,7 @@ void global_init()
597672
BLOCKS.head = NULL;
598673
BLOCKS.tail = NULL;
599674
MACROS = malloc(MAX_ALIASES * sizeof(macro_t));
600-
FUNCS = malloc(MAX_FUNCS * sizeof(func_t));
601-
FUNC_TRIES = malloc(MAX_FUNC_TRIES * sizeof(trie_t));
675+
FUNCS_MAP = create_hashmap(MAX_FUNCS);
602676
TYPES = malloc(MAX_TYPES * sizeof(type_t));
603677
GLOBAL_IR = malloc(MAX_GLOBAL_IR * sizeof(ph1_ir_t));
604678
PH1_IR = malloc(MAX_IR_INSTR * sizeof(ph1_ir_t));
@@ -616,7 +690,8 @@ void global_init()
616690
elf_section = malloc(MAX_SECTION);
617691

618692
/* set starting point of global stack manually */
619-
FUNCS[0].stack_size = 4;
693+
func_t *global_func = add_func("");
694+
global_func->stack_size = 4;
620695
}
621696

622697
void global_release()
@@ -627,8 +702,7 @@ void global_release()
627702
BLOCKS.head = next;
628703
}
629704
free(MACROS);
630-
free(FUNCS);
631-
free(FUNC_TRIES);
705+
free_hashmap(FUNCS_MAP);
632706
free(TYPES);
633707
free(GLOBAL_IR);
634708
free(PH1_IR);

0 commit comments

Comments
 (0)