Skip to content

Commit 7a53b43

Browse files
authoredFeb 27, 2025··
Merge pull request #179 from ChAoSUnItY/refactor/hashmap
Replace func trie with hashmap
2 parents 09bb918 + c50f8c1 commit 7a53b43

File tree

3 files changed

+212
-82
lines changed

3 files changed

+212
-82
lines changed
 

‎Makefile

+1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
CC ?= gcc
22
CFLAGS := -O -g \
33
-std=c99 -pedantic \
4+
-fwrapv \
45
-Wall -Wextra \
56
-Wno-unused-but-set-variable \
67
-Wno-variadic-macros \

‎src/defs.h

+11-4
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
#define MAX_LOCALS 1500
2121
#define MAX_FIELDS 64
2222
#define MAX_FUNCS 512
23-
#define MAX_FUNC_TRIES 2160
2423
#define MAX_TYPES 64
2524
#define MAX_IR_INSTR 50000
2625
#define MAX_BB_PRED 128
@@ -305,10 +304,18 @@ typedef struct {
305304
int value;
306305
} constant_t;
307306

307+
/* string-based hash map definitions */
308+
309+
typedef struct hashmap_node {
310+
char *key;
311+
void *val;
312+
struct hashmap_node *next;
313+
} hashmap_node_t;
314+
308315
typedef struct {
309-
int index;
310-
int next[128];
311-
} trie_t;
316+
int size;
317+
hashmap_node_t **buckets;
318+
} hashmap_t;
312319

313320
struct phi_operand {
314321
var_t *var;

‎src/globals.c

+200-78
Original file line numberDiff line numberDiff line change
@@ -15,18 +15,13 @@ block_list_t BLOCKS;
1515
macro_t *MACROS;
1616
int macros_idx = 0;
1717

18-
/* the first element is reserved for global scope */
19-
func_t *FUNCS;
20-
int funcs_idx = 1;
21-
22-
/* FUNC_TRIES is used to improve the performance of the find_func function.
23-
* Instead of searching through all functions and comparing their names, we can
24-
* utilize the trie data structure to search for existing functions efficiently.
25-
* The index starts from 1 because the first trie node represents an empty input
26-
* string, and it is not possible to record a function with an empty name.
18+
/* FUNCS_MAP is used to integrate function storing and boost lookup
19+
* performance, currently it uses FNV-1a hash function to hash function
20+
* name. The bucket size defaults to MAX_FUNCS. Ideally, it should be a small
21+
* number, but due to lack of rehashing implementation, to prevent collision,
22+
* we have to initially create large amount of buckets.
2723
*/
28-
trie_t *FUNC_TRIES;
29-
int func_tries_idx = 1;
24+
hashmap_t *FUNCS_MAP;
3025

3126
type_t *TYPES;
3227
int types_idx = 0;
@@ -75,72 +70,195 @@ char *elf_strtab;
7570
char *elf_section;
7671

7772
/**
78-
* insert_trie() - Inserts a new element into the trie structure.
79-
* @trie: A pointer to the trie where the name will be inserted.
80-
* @name: The name to be inserted into the trie.
81-
* @funcs_index: The index of the pointer to the func_t. The index is recorded
82-
* in a 1-indexed format. Because the first element of 'FUNCS' has been
83-
* reserved, there is no need to shift it.
84-
* Return: The index of the pointer to the func_t.
73+
* hashmap_hash_index() - hashses a string with FNV-1a hash function
74+
* and converts into usable hashmap index. The range of returned
75+
* hashmap index is ranged from "(0 ~ 2,147,483,647) mod size" due to
76+
* lack of unsigned integer implementation.
77+
* @size: The size of map. Must not be negative or 0.
78+
* @key: The key string. May be NULL.
8579
*
86-
* If the function has been inserted, the return value is the index of the
87-
* function in FUNCS. Otherwise, the return value is the value of the parameter
88-
* @funcs_index.
80+
* Return: The usable hashmap index.
8981
*/
90-
int insert_trie(trie_t *trie, char *name, int funcs_index)
91-
{
92-
char first_char;
93-
int fc;
94-
95-
while (1) {
96-
first_char = *name;
97-
fc = first_char;
98-
if (!fc) {
99-
if (!trie->index)
100-
trie->index = funcs_index;
101-
return trie->index;
102-
}
103-
if (!trie->next[fc]) {
104-
/* FIXME: The func_tries_idx variable may exceed the maximum number,
105-
* which can lead to a segmentation fault. This issue is affected by
106-
* the number of functions and the length of their names. The proper
107-
* way to handle this is to dynamically allocate a new element.
108-
*/
109-
trie->next[fc] = func_tries_idx++;
110-
for (int i = 0; i < 128; i++)
111-
FUNC_TRIES[trie->next[fc]].next[i] = 0;
112-
FUNC_TRIES[trie->next[fc]].index = 0;
113-
}
114-
trie = &FUNC_TRIES[trie->next[fc]];
115-
name++;
82+
int hashmap_hash_index(int size, char *key)
83+
{
84+
int hash = 0x811c9dc5, mask;
85+
86+
for (; *key; key++) {
87+
hash ^= *key;
88+
hash *= 0x01000193;
11689
}
90+
91+
mask = hash >> 31;
92+
return ((hash ^ mask) - mask) & (size - 1);
93+
}
94+
95+
int round_up_pow2(int v)
96+
{
97+
v--;
98+
v |= v >> 1;
99+
v |= v >> 2;
100+
v |= v >> 4;
101+
v |= v >> 8;
102+
v |= v >> 16;
103+
v++;
104+
return v;
117105
}
118106

119107
/**
120-
* find_trie() - search the index of the function name in the trie
121-
* @trie: A pointer to the trie where the name will be searched.
122-
* @name: The name to be searched.
108+
* hashmap_create() - creates a hashmap on heap. Notice that
109+
* provided size will always be rounded up to nearest power of 2.
110+
* @size: The initial bucket size of hashmap. Must not be 0 or
111+
* negative.
123112
*
124-
* Return: The index of the pointer to the func_t.
113+
* Return: The pointer of created hashmap.
114+
*/
115+
hashmap_t *hashmap_create(int size)
116+
{
117+
hashmap_t *map = malloc(sizeof(hashmap_t));
118+
119+
if (!map) {
120+
printf("Failed to allocate hashmap_t with size %d\n", size);
121+
return NULL;
122+
}
123+
124+
map->size = round_up_pow2(size);
125+
map->buckets = malloc(map->size * sizeof(hashmap_node_t *));
126+
127+
if (!map->buckets) {
128+
printf("Failed to allocate buckets in hashmap_t\n");
129+
free(map);
130+
return NULL;
131+
}
132+
133+
for (int i = 0; i < map->size; i++)
134+
map->buckets[i] = 0;
135+
136+
return map;
137+
}
138+
139+
/**
140+
* hashmap_node_new() - creates a hashmap node on heap.
141+
* @key: The key of node. Must not be NULL.
142+
* @val: The value of node. Could be NULL.
125143
*
126-
* 0 - the name not found.
127-
* otherwise - the index of the founded index in the trie array.
144+
* Return: The pointer of created node.
128145
*/
129-
int find_trie(trie_t *trie, char *name)
146+
hashmap_node_t *hashmap_node_new(char *key, void *val)
130147
{
131-
char first_char;
132-
int fc;
148+
if (!key)
149+
return NULL;
150+
151+
int len = strlen(key);
152+
hashmap_node_t *node = malloc(sizeof(hashmap_node_t));
133153

134-
while (1) {
135-
first_char = *name;
136-
fc = first_char;
137-
if (!fc)
138-
return trie->index;
139-
if (!trie->next[fc])
140-
return 0;
141-
trie = &FUNC_TRIES[trie->next[fc]];
142-
name++;
154+
155+
if (!node) {
156+
printf("Failed to allocate hashmap_node_t\n");
157+
return NULL;
143158
}
159+
160+
node->key = calloc(len + 1, sizeof(char));
161+
162+
if (!node->key) {
163+
printf("Failed to allocate hashmap_node_t key with size %d\n");
164+
free(node);
165+
return NULL;
166+
}
167+
168+
strcpy(node->key, key);
169+
node->val = val;
170+
node->next = NULL;
171+
return node;
172+
}
173+
174+
/**
175+
* hashmap_put() - puts a key-value pair into given hashmap.
176+
* If key already contains a value, then replace it with new
177+
* value, the old value will be freed.
178+
* @map: The hashmap to be put into. Must not be NULL.
179+
* @key: The key string. May be NULL.
180+
* @val: The value pointer. May be NULL. This value's lifetime
181+
* is held by hashmap.
182+
*/
183+
void hashmap_put(hashmap_t *map, char *key, void *val)
184+
{
185+
if (!map)
186+
return;
187+
188+
int index = hashmap_hash_index(map->size, key);
189+
hashmap_node_t *cur = map->buckets[index];
190+
191+
if (!cur) {
192+
map->buckets[index] = hashmap_node_new(key, val);
193+
} else {
194+
while (cur->next)
195+
cur = cur->next;
196+
cur->next = hashmap_node_new(key, val);
197+
}
198+
199+
/* TODO: Rehash if size exceeds size * load factor */
200+
}
201+
202+
/**
203+
* hashmap_get() - gets value from hashmap from given key.
204+
* @map: The hashmap to be looked up. Must no be NULL.
205+
* @key: The key string. May be NULL.
206+
*
207+
* Return: The look up result, if the key-value pair entry
208+
* exists, then returns its value's address, NULL otherwise.
209+
*/
210+
void *hashmap_get(hashmap_t *map, char *key)
211+
{
212+
if (!map)
213+
return NULL;
214+
215+
int index = hashmap_hash_index(map->size, key);
216+
217+
for (hashmap_node_t *cur = map->buckets[index]; cur; cur = cur->next)
218+
if (!strcmp(cur->key, key))
219+
return cur->val;
220+
221+
return NULL;
222+
}
223+
224+
/**
225+
* hashmap_contains() - checks if the key-value pair entry exists
226+
* from given key.
227+
* @map: The hashmap to be looked up. Must no be NULL.
228+
* @key: The key string. May be NULL.
229+
*
230+
* Return: The look up result, if the key-value pair entry
231+
* exists, then returns true, false otherwise.
232+
*/
233+
bool hashmap_contains(hashmap_t *map, char *key)
234+
{
235+
return hashmap_get(map, key);
236+
}
237+
238+
/**
239+
* hashmap_free() - frees the hashmap, this also frees key-value pair
240+
* entry's value.
241+
* @map: The hashmap to be looked up. Must no be NULL.
242+
*/
243+
void hashmap_free(hashmap_t *map)
244+
{
245+
if (!map)
246+
return;
247+
248+
for (int i = 0; i < map->size; i++) {
249+
for (hashmap_node_t *cur = map->buckets[i], *next; cur; cur = next) {
250+
next = cur->next;
251+
free(cur->key);
252+
free(cur->val);
253+
/* FIXME: Remove this if-clause will cause double free error */
254+
if (cur != map->buckets[0])
255+
free(cur);
256+
cur = next;
257+
}
258+
}
259+
260+
free(map->buckets);
261+
free(map);
144262
}
145263

146264
/* options */
@@ -321,12 +439,20 @@ int find_macro_param_src_idx(char *name, block_t *parent)
321439
func_t *add_func(char *name)
322440
{
323441
func_t *fn;
324-
int index = insert_trie(FUNC_TRIES, name, funcs_idx);
325-
if (index == funcs_idx) {
326-
fn = &FUNCS[funcs_idx++];
442+
if (hashmap_contains(FUNCS_MAP, name)) {
443+
fn = hashmap_get(FUNCS_MAP, name);
444+
} else {
445+
fn = malloc(sizeof(func_t));
446+
447+
if (!fn) {
448+
printf("Failed to allocate func_t\n");
449+
return NULL;
450+
}
451+
452+
hashmap_put(FUNCS_MAP, name, fn);
327453
strcpy(fn->return_def.var_name, name);
328454
}
329-
fn = &FUNCS[index];
455+
330456
fn->stack_size = 4; /* starting point of stack */
331457
return fn;
332458
}
@@ -361,10 +487,7 @@ constant_t *find_constant(char alias[])
361487

362488
func_t *find_func(char func_name[])
363489
{
364-
int index = find_trie(FUNC_TRIES, func_name);
365-
if (index)
366-
return &FUNCS[index];
367-
return NULL;
490+
return hashmap_get(FUNCS_MAP, func_name);
368491
}
369492

370493
var_t *find_member(char token[], type_t *type)
@@ -600,8 +723,7 @@ void global_init()
600723
BLOCKS.head = NULL;
601724
BLOCKS.tail = NULL;
602725
MACROS = malloc(MAX_ALIASES * sizeof(macro_t));
603-
FUNCS = malloc(MAX_FUNCS * sizeof(func_t));
604-
FUNC_TRIES = malloc(MAX_FUNC_TRIES * sizeof(trie_t));
726+
FUNCS_MAP = hashmap_create(MAX_FUNCS);
605727
TYPES = malloc(MAX_TYPES * sizeof(type_t));
606728
GLOBAL_IR = malloc(MAX_GLOBAL_IR * sizeof(ph1_ir_t));
607729
PH1_IR = malloc(MAX_IR_INSTR * sizeof(ph1_ir_t));
@@ -619,7 +741,8 @@ void global_init()
619741
elf_section = malloc(MAX_SECTION);
620742

621743
/* set starting point of global stack manually */
622-
FUNCS[0].stack_size = 4;
744+
func_t *global_func = add_func("");
745+
global_func->stack_size = 4;
623746
}
624747

625748
void global_release()
@@ -630,8 +753,7 @@ void global_release()
630753
BLOCKS.head = next;
631754
}
632755
free(MACROS);
633-
free(FUNCS);
634-
free(FUNC_TRIES);
756+
hashmap_free(FUNCS_MAP);
635757
free(TYPES);
636758
free(GLOBAL_IR);
637759
free(PH1_IR);

0 commit comments

Comments
 (0)
Please sign in to comment.