@@ -12,18 +12,13 @@ block_list_t BLOCKS;
12
12
macro_t * MACROS ;
13
13
int macros_idx = 0 ;
14
14
15
- /* the first element is reserved for global scope */
16
- func_t * FUNCS ;
17
- int funcs_idx = 1 ;
18
-
19
- /* FUNC_TRIES is used to improve the performance of the find_func function.
20
- * Instead of searching through all functions and comparing their names, we can
21
- * utilize the trie data structure to search for existing functions efficiently.
22
- * The index starts from 1 because the first trie node represents an empty input
23
- * string, and it is not possible to record a function with an empty name.
15
+ /* FUNCS_MAP is used to integerate function storing and boost lookup
16
+ * performance, currently it uses FNV-1a hash function to hash function
17
+ * name. The bucket size defaults to MAX_FUNCS. Ideally, it should be a small
18
+ * number, but due to lack of rehashing implementation, to prevent collision,
19
+ * we have to initially create large amount of buckets.
24
20
*/
25
- trie_t * FUNC_TRIES ;
26
- int func_tries_idx = 1 ;
21
+ hashmap_t * FUNCS_MAP ;
27
22
28
23
type_t * TYPES ;
29
24
int types_idx = 0 ;
@@ -72,72 +67,143 @@ char *elf_strtab;
72
67
char * elf_section ;
73
68
74
69
/**
75
- * insert_trie() - Inserts a new element into the trie structure.
76
- * @trie: A pointer to the trie where the name will be inserted.
77
- * @name: The name to be inserted into the trie.
78
- * @funcs_index: The index of the pointer to the func_t. The index is recorded
79
- * in a 1-indexed format. Because the first element of 'FUNCS' has been
80
- * reserved, there is no need to shift it.
81
- * Return: The index of the pointer to the func_t.
70
+ * hash_index() - hashses a string with FNV-1a hash function
71
+ * and converts into usable hashmap index. The range of returned
72
+ * hashmap index is ranged from "(0 ~ 2,147,483,647) mod size" due to
73
+ * lack of unsigned integer implementation.
74
+ * @size: The size of map. Must not be negative or 0.
75
+ * @key: The key string. May be NULL.
82
76
*
83
- * If the function has been inserted, the return value is the index of the
84
- * function in FUNCS. Otherwise, the return value is the value of the parameter
85
- * @funcs_index.
77
+ * @returns: The usable hashmap index.
86
78
*/
87
- int insert_trie (trie_t * trie , char * name , int funcs_index )
88
- {
89
- char first_char ;
90
- int fc ;
91
-
92
- while (1 ) {
93
- first_char = * name ;
94
- fc = first_char ;
95
- if (!fc ) {
96
- if (!trie -> index )
97
- trie -> index = funcs_index ;
98
- return trie -> index ;
99
- }
100
- if (!trie -> next [fc ]) {
101
- /* FIXME: The func_tries_idx variable may exceed the maximum number,
102
- * which can lead to a segmentation fault. This issue is affected by
103
- * the number of functions and the length of their names. The proper
104
- * way to handle this is to dynamically allocate a new element.
105
- */
106
- trie -> next [fc ] = func_tries_idx ++ ;
107
- for (int i = 0 ; i < 128 ; i ++ )
108
- FUNC_TRIES [trie -> next [fc ]].next [i ] = 0 ;
109
- FUNC_TRIES [trie -> next [fc ]].index = 0 ;
110
- }
111
- trie = & FUNC_TRIES [trie -> next [fc ]];
112
- name ++ ;
79
+ int hash_index (int size , char * key )
80
+ {
81
+ int hash = 0x811c9dc5 , mask ;
82
+
83
+ for (; * key ; key ++ ) {
84
+ hash ^= * key ;
85
+ hash *= 0x01000193 ;
86
+ }
87
+
88
+ mask = hash >> 31 ;
89
+ return ((hash ^ mask ) - mask ) % size ;
90
+ }
91
+
92
+ /**
93
+ * hashmap_create() - creates a hashmap on heap.
94
+ * @size: The initial bucket size of hashmap. Must not be 0.
95
+ *
96
+ * @returns: The pointer of created hashmap.
97
+ */
98
+ hashmap_t * hashmap_create (int size )
99
+ {
100
+ hashmap_t * map = malloc (sizeof (hashmap_t ));
101
+ map -> size = size ;
102
+ map -> buckets = malloc (size * sizeof (hashmap_node_t * ));
103
+
104
+ for (int i = 0 ; i < map -> size ; i ++ )
105
+ map -> buckets [i ] = 0 ;
106
+
107
+ return map ;
108
+ }
109
+
110
+ /**
111
+ * hashmap_node_new() - creates a hashmap node on heap.
112
+ * @key: The key of node. Must not be NULL.
113
+ * @val: The value of node. Could be NULL.
114
+ *
115
+ * @returns: The pointer of created node.
116
+ */
117
+ hashmap_node_t * hashmap_node_new (char * key , void * val )
118
+ {
119
+ int len = strlen (key );
120
+ hashmap_node_t * node = malloc (sizeof (hashmap_node_t ));
121
+ node -> key = calloc (len + 1 , sizeof (char ));
122
+ strcpy (node -> key , key );
123
+ node -> val = val ;
124
+ node -> next = NULL ;
125
+ return node ;
126
+ }
127
+
128
+ /**
129
+ * hashmap_put() - puts a key-value pair into given hashmap.
130
+ * If key already contains a value, then replace it with new
131
+ * value, the old value will be freed.
132
+ * @map: The hashmap to be put into. Must not be NULL.
133
+ * @key: The key string. May be NULL.
134
+ * @val: The value pointer. May be NULL. This value's lifetime
135
+ * is held by hashmap.
136
+ */
137
+ void hashmap_put (hashmap_t * map , char * key , void * val )
138
+ {
139
+ int index = hash_index (map -> size , key );
140
+ hashmap_node_t * cur = map -> buckets [index ];
141
+
142
+ if (!cur ) {
143
+ map -> buckets [index ] = hashmap_node_new (key , val );
144
+ } else {
145
+ while (cur -> next )
146
+ cur = cur -> next ;
147
+ cur -> next = hashmap_node_new (key , val );
113
148
}
149
+
150
+ /* TODO: Rehash if size exceeds size * load factor */
114
151
}
115
152
116
153
/**
117
- * find_trie () - search the index of the function name in the trie
118
- * @trie: A pointer to the trie where the name will be searched .
119
- * @name : The name to be searched .
154
+ * hashmap_get () - gets value from hashmap from given key.
155
+ * @map: The hashmap to be looked up. Must no be NULL .
156
+ * @key : The key string. May be NULL .
120
157
*
121
- * Return: The index of the pointer to the func_t.
158
+ * @returns: The look up result, if the key-value pair entry
159
+ * exists, then returns its value's address, NULL otherwise.
160
+ */
161
+ void * hashmap_get (hashmap_t * map , char * key )
162
+ {
163
+ int index = hash_index (map -> size , key );
164
+
165
+ for (hashmap_node_t * cur = map -> buckets [index ]; cur ; cur = cur -> next )
166
+ if (!strcmp (cur -> key , key ))
167
+ return cur -> val ;
168
+
169
+ return NULL ;
170
+ }
171
+
172
+ /**
173
+ * hashmap_contains() - checks if the key-value pair entry exists
174
+ * from given key.
175
+ * @map: The hashmap to be looked up. Must no be NULL.
176
+ * @key: The key string. May be NULL.
122
177
*
123
- * 0 - the name not found.
124
- * otherwise - the index of the founded index in the trie array.
178
+ * @returns: The look up result, if the key-value pair entry
179
+ * exists, then returns true, false otherwise.
180
+ */
181
+ bool hashmap_contains (hashmap_t * map , char * key )
182
+ {
183
+ return hashmap_get (map , key );
184
+ }
185
+
186
+ /**
187
+ * hashmap_free() - frees the hashmap, this also frees key-value pair
188
+ * entry's value.
189
+ * @map: The hashmap to be looked up. Must no be NULL.
125
190
*/
126
- int find_trie (trie_t * trie , char * name )
127
- {
128
- char first_char ;
129
- int fc ;
130
-
131
- while (1 ) {
132
- first_char = * name ;
133
- fc = first_char ;
134
- if (!fc )
135
- return trie -> index ;
136
- if (!trie -> next [fc ])
137
- return 0 ;
138
- trie = & FUNC_TRIES [trie -> next [fc ]];
139
- name ++ ;
191
+ void hashmap_free (hashmap_t * map )
192
+ {
193
+ for (int i = 0 ; i < map -> size ; i ++ ) {
194
+ for (hashmap_node_t * cur = map -> buckets [i ], * next ; cur ; cur = next ) {
195
+ next = cur -> next ;
196
+ free (cur -> key );
197
+ free (cur -> val );
198
+ /* FIXME: Remove this if-clause will cause double free error */
199
+ if (cur != map -> buckets [0 ])
200
+ free (cur );
201
+ cur = next ;
202
+ }
140
203
}
204
+
205
+ free (map -> buckets );
206
+ free (map );
141
207
}
142
208
143
209
/* options */
@@ -318,12 +384,14 @@ int find_macro_param_src_idx(char *name, block_t *parent)
318
384
func_t * add_func (char * name )
319
385
{
320
386
func_t * fn ;
321
- int index = insert_trie (FUNC_TRIES , name , funcs_idx );
322
- if (index == funcs_idx ) {
323
- fn = & FUNCS [funcs_idx ++ ];
387
+ if (hashmap_contains (FUNCS_MAP , name )) {
388
+ fn = hashmap_get (FUNCS_MAP , name );
389
+ } else {
390
+ fn = malloc (sizeof (func_t ));
391
+ hashmap_put (FUNCS_MAP , name , fn );
324
392
strcpy (fn -> return_def .var_name , name );
325
393
}
326
- fn = & FUNCS [ index ];
394
+
327
395
fn -> stack_size = 4 ; /* starting point of stack */
328
396
return fn ;
329
397
}
@@ -358,10 +426,7 @@ constant_t *find_constant(char alias[])
358
426
359
427
func_t * find_func (char func_name [])
360
428
{
361
- int index = find_trie (FUNC_TRIES , func_name );
362
- if (index )
363
- return & FUNCS [index ];
364
- return NULL ;
429
+ return hashmap_get (FUNCS_MAP , func_name );
365
430
}
366
431
367
432
var_t * find_member (char token [], type_t * type )
@@ -597,8 +662,7 @@ void global_init()
597
662
BLOCKS .head = NULL ;
598
663
BLOCKS .tail = NULL ;
599
664
MACROS = malloc (MAX_ALIASES * sizeof (macro_t ));
600
- FUNCS = malloc (MAX_FUNCS * sizeof (func_t ));
601
- FUNC_TRIES = malloc (MAX_FUNC_TRIES * sizeof (trie_t ));
665
+ FUNCS_MAP = hashmap_create (MAX_FUNCS );
602
666
TYPES = malloc (MAX_TYPES * sizeof (type_t ));
603
667
GLOBAL_IR = malloc (MAX_GLOBAL_IR * sizeof (ph1_ir_t ));
604
668
PH1_IR = malloc (MAX_IR_INSTR * sizeof (ph1_ir_t ));
@@ -616,7 +680,8 @@ void global_init()
616
680
elf_section = malloc (MAX_SECTION );
617
681
618
682
/* set starting point of global stack manually */
619
- FUNCS [0 ].stack_size = 4 ;
683
+ func_t * global_func = add_func ("" );
684
+ global_func -> stack_size = 4 ;
620
685
}
621
686
622
687
void global_release ()
@@ -627,8 +692,7 @@ void global_release()
627
692
BLOCKS .head = next ;
628
693
}
629
694
free (MACROS );
630
- free (FUNCS );
631
- free (FUNC_TRIES );
695
+ hashmap_free (FUNCS_MAP );
632
696
free (TYPES );
633
697
free (GLOBAL_IR );
634
698
free (PH1_IR );
0 commit comments