@@ -15,18 +15,13 @@ block_list_t BLOCKS;
15
15
macro_t * MACROS ;
16
16
int macros_idx = 0 ;
17
17
18
- /* the first element is reserved for global scope */
19
- func_t * FUNCS ;
20
- int funcs_idx = 1 ;
21
-
22
- /* FUNC_TRIES is used to improve the performance of the find_func function.
23
- * Instead of searching through all functions and comparing their names, we can
24
- * utilize the trie data structure to search for existing functions efficiently.
25
- * The index starts from 1 because the first trie node represents an empty input
26
- * string, and it is not possible to record a function with an empty name.
18
+ /* FUNCS_MAP is used to integerate function storing and boost lookup
19
+ * performance, currently it uses FNV-1a hash function to hash function
20
+ * name. The bucket size defaults to MAX_FUNCS. Ideally, it should be a small
21
+ * number, but due to lack of rehashing implementation, to prevent collision,
22
+ * we have to initially create large amount of buckets.
27
23
*/
28
- trie_t * FUNC_TRIES ;
29
- int func_tries_idx = 1 ;
24
+ hashmap_t * FUNCS_MAP ;
30
25
31
26
type_t * TYPES ;
32
27
int types_idx = 0 ;
@@ -75,72 +70,185 @@ char *elf_strtab;
75
70
char * elf_section ;
76
71
77
72
/**
78
- * insert_trie() - Inserts a new element into the trie structure.
79
- * @trie: A pointer to the trie where the name will be inserted.
80
- * @name: The name to be inserted into the trie.
81
- * @funcs_index: The index of the pointer to the func_t. The index is recorded
82
- * in a 1-indexed format. Because the first element of 'FUNCS' has been
83
- * reserved, there is no need to shift it.
84
- * Return: The index of the pointer to the func_t.
73
+ * hashmap_hash_index() - hashses a string with FNV-1a hash function
74
+ * and converts into usable hashmap index. The range of returned
75
+ * hashmap index is ranged from "(0 ~ 2,147,483,647) mod size" due to
76
+ * lack of unsigned integer implementation.
77
+ * @size: The size of map. Must not be negative or 0.
78
+ * @key: The key string. May be NULL.
85
79
*
86
- * If the function has been inserted, the return value is the index of the
87
- * function in FUNCS. Otherwise, the return value is the value of the parameter
88
- * @funcs_index.
80
+ * Return: The usable hashmap index.
89
81
*/
90
- int insert_trie (trie_t * trie , char * name , int funcs_index )
91
- {
92
- char first_char ;
93
- int fc ;
94
-
95
- while (1 ) {
96
- first_char = * name ;
97
- fc = first_char ;
98
- if (!fc ) {
99
- if (!trie -> index )
100
- trie -> index = funcs_index ;
101
- return trie -> index ;
102
- }
103
- if (!trie -> next [fc ]) {
104
- /* FIXME: The func_tries_idx variable may exceed the maximum number,
105
- * which can lead to a segmentation fault. This issue is affected by
106
- * the number of functions and the length of their names. The proper
107
- * way to handle this is to dynamically allocate a new element.
108
- */
109
- trie -> next [fc ] = func_tries_idx ++ ;
110
- for (int i = 0 ; i < 128 ; i ++ )
111
- FUNC_TRIES [trie -> next [fc ]].next [i ] = 0 ;
112
- FUNC_TRIES [trie -> next [fc ]].index = 0 ;
113
- }
114
- trie = & FUNC_TRIES [trie -> next [fc ]];
115
- name ++ ;
82
+ int hashmap_hash_index (int size , char * key )
83
+ {
84
+ int hash = 0x811c9dc5 , mask ;
85
+
86
+ for (; * key ; key ++ ) {
87
+ hash ^= * key ;
88
+ hash *= 0x01000193 ;
116
89
}
90
+
91
+ mask = hash >> 31 ;
92
+ return ((hash ^ mask ) - mask ) & (size - 1 );
93
+ }
94
+
95
+ int round_up_pow2 (int v )
96
+ {
97
+ v -- ;
98
+ v |= v >> 1 ;
99
+ v |= v >> 2 ;
100
+ v |= v >> 4 ;
101
+ v |= v >> 8 ;
102
+ v |= v >> 16 ;
103
+ v ++ ;
104
+ return v ;
117
105
}
118
106
119
107
/**
120
- * find_trie() - search the index of the function name in the trie
121
- * @trie: A pointer to the trie where the name will be searched.
122
- * @name: The name to be searched.
108
+ * hashmap_create() - creates a hashmap on heap. Notice that
109
+ * provided size will always be rounded up to nearest power of 2.
110
+ * @size: The initial bucket size of hashmap. Must not be 0 or
111
+ * negative.
123
112
*
124
- * Return: The index of the pointer to the func_t.
113
+ * Return: The pointer of created hashmap.
114
+ */
115
+ hashmap_t * hashmap_create (int size )
116
+ {
117
+ hashmap_t * map = malloc (sizeof (hashmap_t ));
118
+
119
+ if (!map ) {
120
+ printf ("Failed to allocate hashmap_t with size %d\n" , size );
121
+ return NULL ;
122
+ }
123
+
124
+ map -> size = round_up_pow2 (size );
125
+ map -> buckets = malloc (map -> size * sizeof (hashmap_node_t * ));
126
+
127
+ if (!map -> buckets ) {
128
+ printf ("Failed to allocate buckets in hashmap_t\n" );
129
+ return NULL ;
130
+ }
131
+
132
+ for (int i = 0 ; i < map -> size ; i ++ )
133
+ map -> buckets [i ] = 0 ;
134
+
135
+ return map ;
136
+ }
137
+
138
+ /**
139
+ * hashmap_node_new() - creates a hashmap node on heap.
140
+ * @key: The key of node. Must not be NULL.
141
+ * @val: The value of node. Could be NULL.
125
142
*
126
- * 0 - the name not found.
127
- * otherwise - the index of the founded index in the trie array.
143
+ * Return: The pointer of created node.
128
144
*/
129
- int find_trie ( trie_t * trie , char * name )
145
+ hashmap_node_t * hashmap_node_new ( char * key , void * val )
130
146
{
131
- char first_char ;
132
- int fc ;
147
+ if (!key )
148
+ return NULL ;
149
+
150
+ int len = strlen (key );
151
+ hashmap_node_t * node = malloc (sizeof (hashmap_node_t ));
152
+
133
153
134
- while (1 ) {
135
- first_char = * name ;
136
- fc = first_char ;
137
- if (!fc )
138
- return trie -> index ;
139
- if (!trie -> next [fc ])
140
- return 0 ;
141
- trie = & FUNC_TRIES [trie -> next [fc ]];
142
- name ++ ;
154
+ if (!node ) {
155
+ printf ("Failed to allocate hashmap_node_t\n" );
156
+ return NULL ;
143
157
}
158
+
159
+ node -> key = calloc (len + 1 , sizeof (char ));
160
+
161
+ if (!node -> key ) {
162
+ printf ("Failed to allocate hashmap_node_t key with size %d\n" );
163
+ free (node );
164
+ return NULL ;
165
+ }
166
+
167
+ strcpy (node -> key , key );
168
+ node -> val = val ;
169
+ node -> next = NULL ;
170
+ return node ;
171
+ }
172
+
173
+ /**
174
+ * hashmap_put() - puts a key-value pair into given hashmap.
175
+ * If key already contains a value, then replace it with new
176
+ * value, the old value will be freed.
177
+ * @map: The hashmap to be put into. Must not be NULL.
178
+ * @key: The key string. May be NULL.
179
+ * @val: The value pointer. May be NULL. This value's lifetime
180
+ * is held by hashmap.
181
+ */
182
+ void hashmap_put (hashmap_t * map , char * key , void * val )
183
+ {
184
+ int index = hashmap_hash_index (map -> size , key );
185
+ hashmap_node_t * cur = map -> buckets [index ];
186
+
187
+ if (!cur ) {
188
+ map -> buckets [index ] = hashmap_node_new (key , val );
189
+ } else {
190
+ while (cur -> next )
191
+ cur = cur -> next ;
192
+ cur -> next = hashmap_node_new (key , val );
193
+ }
194
+
195
+ /* TODO: Rehash if size exceeds size * load factor */
196
+ }
197
+
198
+ /**
199
+ * hashmap_get() - gets value from hashmap from given key.
200
+ * @map: The hashmap to be looked up. Must no be NULL.
201
+ * @key: The key string. May be NULL.
202
+ *
203
+ * Return: The look up result, if the key-value pair entry
204
+ * exists, then returns its value's address, NULL otherwise.
205
+ */
206
+ void * hashmap_get (hashmap_t * map , char * key )
207
+ {
208
+ int index = hashmap_hash_index (map -> size , key );
209
+
210
+ for (hashmap_node_t * cur = map -> buckets [index ]; cur ; cur = cur -> next )
211
+ if (!strcmp (cur -> key , key ))
212
+ return cur -> val ;
213
+
214
+ return NULL ;
215
+ }
216
+
217
+ /**
218
+ * hashmap_contains() - checks if the key-value pair entry exists
219
+ * from given key.
220
+ * @map: The hashmap to be looked up. Must no be NULL.
221
+ * @key: The key string. May be NULL.
222
+ *
223
+ * Return: The look up result, if the key-value pair entry
224
+ * exists, then returns true, false otherwise.
225
+ */
226
+ bool hashmap_contains (hashmap_t * map , char * key )
227
+ {
228
+ return hashmap_get (map , key );
229
+ }
230
+
231
+ /**
232
+ * hashmap_free() - frees the hashmap, this also frees key-value pair
233
+ * entry's value.
234
+ * @map: The hashmap to be looked up. Must no be NULL.
235
+ */
236
+ void hashmap_free (hashmap_t * map )
237
+ {
238
+ for (int i = 0 ; i < map -> size ; i ++ ) {
239
+ for (hashmap_node_t * cur = map -> buckets [i ], * next ; cur ; cur = next ) {
240
+ next = cur -> next ;
241
+ free (cur -> key );
242
+ free (cur -> val );
243
+ /* FIXME: Remove this if-clause will cause double free error */
244
+ if (cur != map -> buckets [0 ])
245
+ free (cur );
246
+ cur = next ;
247
+ }
248
+ }
249
+
250
+ free (map -> buckets );
251
+ free (map );
144
252
}
145
253
146
254
/* options */
@@ -321,12 +429,20 @@ int find_macro_param_src_idx(char *name, block_t *parent)
321
429
func_t * add_func (char * name )
322
430
{
323
431
func_t * fn ;
324
- int index = insert_trie (FUNC_TRIES , name , funcs_idx );
325
- if (index == funcs_idx ) {
326
- fn = & FUNCS [funcs_idx ++ ];
432
+ if (hashmap_contains (FUNCS_MAP , name )) {
433
+ fn = hashmap_get (FUNCS_MAP , name );
434
+ } else {
435
+ fn = malloc (sizeof (func_t ));
436
+
437
+ if (!fn ) {
438
+ printf ("Failed to allocate func_t\n" );
439
+ return NULL ;
440
+ }
441
+
442
+ hashmap_put (FUNCS_MAP , name , fn );
327
443
strcpy (fn -> return_def .var_name , name );
328
444
}
329
- fn = & FUNCS [ index ];
445
+
330
446
fn -> stack_size = 4 ; /* starting point of stack */
331
447
return fn ;
332
448
}
@@ -361,10 +477,7 @@ constant_t *find_constant(char alias[])
361
477
362
478
func_t * find_func (char func_name [])
363
479
{
364
- int index = find_trie (FUNC_TRIES , func_name );
365
- if (index )
366
- return & FUNCS [index ];
367
- return NULL ;
480
+ return hashmap_get (FUNCS_MAP , func_name );
368
481
}
369
482
370
483
var_t * find_member (char token [], type_t * type )
@@ -600,8 +713,11 @@ void global_init()
600
713
BLOCKS .head = NULL ;
601
714
BLOCKS .tail = NULL ;
602
715
MACROS = malloc (MAX_ALIASES * sizeof (macro_t ));
603
- FUNCS = malloc (MAX_FUNCS * sizeof (func_t ));
604
- FUNC_TRIES = malloc (MAX_FUNC_TRIES * sizeof (trie_t ));
716
+ FUNCS_MAP = hashmap_create (MAX_FUNCS );
717
+
718
+ if (!FUNCS_MAP )
719
+ printf ("Failed to initialized FUNCS_MAP\n" );
720
+
605
721
TYPES = malloc (MAX_TYPES * sizeof (type_t ));
606
722
GLOBAL_IR = malloc (MAX_GLOBAL_IR * sizeof (ph1_ir_t ));
607
723
PH1_IR = malloc (MAX_IR_INSTR * sizeof (ph1_ir_t ));
@@ -619,7 +735,8 @@ void global_init()
619
735
elf_section = malloc (MAX_SECTION );
620
736
621
737
/* set starting point of global stack manually */
622
- FUNCS [0 ].stack_size = 4 ;
738
+ func_t * global_func = add_func ("" );
739
+ global_func -> stack_size = 4 ;
623
740
}
624
741
625
742
void global_release ()
@@ -630,8 +747,10 @@ void global_release()
630
747
BLOCKS .head = next ;
631
748
}
632
749
free (MACROS );
633
- free (FUNCS );
634
- free (FUNC_TRIES );
750
+
751
+ if (FUNCS_MAP )
752
+ hashmap_free (FUNCS_MAP );
753
+
635
754
free (TYPES );
636
755
free (GLOBAL_IR );
637
756
free (PH1_IR );
0 commit comments