Skip to content

Commit 4bd95f8

Browse files
committed
tools: Add C middle-processor to make builtin tables proper hash tables.
1 parent 521759e commit 4bd95f8

File tree

1 file changed

+262
-0
lines changed

1 file changed

+262
-0
lines changed

tools/cc1

+262
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,262 @@
1+
#!/usr/bin/env python3
2+
3+
"""
4+
This is a middle-processor for MicroPython source files. It takes the output
5+
of the C preprocessor, has the option to change it, then feeds this into the
6+
C compiler.
7+
8+
It currently has the ability to reorder static hash tables so they are actually
9+
hashed, resulting in faster lookup times at runtime.
10+
11+
To use, configure the Python variables below, and add the following line to the
12+
Makefile:
13+
14+
CFLAGS += -no-integrated-cpp -B$(shell pwd)/../tools
15+
"""
16+
17+
import sys
18+
import os
19+
import re
20+
21+
################################################################################
22+
# these are the configuration variables
23+
# TODO somehow make them externally configurable
24+
25+
# this is the path to the true C compiler
26+
cc1_path = '/usr/lib/gcc/x86_64-unknown-linux-gnu/5.3.0/cc1'
27+
#cc1_path = '/usr/lib/gcc/arm-none-eabi/5.3.0/cc1'
28+
29+
# this must be the same as MICROPY_QSTR_BYTES_IN_HASH
30+
bytes_in_qstr_hash = 2
31+
32+
# this must be 1 or more (can be a decimal)
33+
# larger uses more code size but yields faster lookups
34+
table_size_mult = 1
35+
36+
# these control output during processing
37+
print_stats = True
38+
print_debug = False
39+
40+
# end configuration variables
41+
################################################################################
42+
43+
# precompile regexs
44+
re_preproc_line = re.compile(r'# [0-9]+ ')
45+
re_map_entry = re.compile(r'\{.+?\(MP_QSTR_([A-Za-z0-9_]+)\).+\},')
46+
re_mp_obj_dict_t = re.compile(r'(?P<head>(static )?const mp_obj_dict_t (?P<id>[a-z0-9_]+) = \{ \.base = \{&mp_type_dict\}, \.map = \{ \.all_keys_are_qstrs = 1, \.is_fixed = 1, \.is_ordered = )1(?P<tail>, \.used = .+ };)$')
47+
re_mp_map_t = re.compile(r'(?P<head>(static )?const mp_map_t (?P<id>[a-z0-9_]+) = \{ \.all_keys_are_qstrs = 1, \.is_fixed = 1, \.is_ordered = )1(?P<tail>, \.used = .+ };)$')
48+
re_mp_rom_map_elem_t = re.compile(r'static const mp_rom_map_elem_t [a-z_0-9]+\[\] = {$')
49+
50+
# this must match the equivalent function in qstr.c
51+
def compute_hash(qstr):
52+
hash = 5381
53+
for char in qstr:
54+
hash = (hash * 33) ^ ord(char)
55+
# Make sure that valid hash is never zero, zero means "hash not computed"
56+
return (hash & ((1 << (8 * bytes_in_qstr_hash)) - 1)) or 1
57+
58+
# this algo must match the equivalent in map.c
59+
def hash_insert(map, key, value):
60+
hash = compute_hash(key)
61+
pos = hash % len(map)
62+
start_pos = pos
63+
if print_debug:
64+
print(' insert %s: start at %u/%u -- ' % (key, pos, len(map)), end='')
65+
while True:
66+
if map[pos] is None:
67+
# found empty slot, so key is not in table
68+
if print_debug:
69+
print('put at %u' % pos)
70+
map[pos] = (key, value)
71+
return
72+
else:
73+
# not yet found, keep searching
74+
if map[pos][0] == key:
75+
raise AssertionError("duplicate key '%s'" % (key,))
76+
pos = (pos + 1) % len(map)
77+
assert pos != start_pos
78+
79+
def hash_find(map, key):
80+
hash = compute_hash(key)
81+
pos = hash % len(map)
82+
start_pos = pos
83+
attempts = 0
84+
while True:
85+
attempts += 1
86+
if map[pos] is None:
87+
return attempts, None
88+
elif map[pos][0] == key:
89+
return attempts, map[pos][1]
90+
else:
91+
pos = (pos + 1) % len(map)
92+
if pos == start_pos:
93+
return attempts, None
94+
95+
def process_map_table(file, line, output):
96+
output.append(line)
97+
98+
# consume all lines that are entries of the table and concat them
99+
# (we do it this way because there can be multiple entries on one line)
100+
table_contents = []
101+
while True:
102+
line = file.readline()
103+
if len(line) == 0:
104+
print('unexpected end of input')
105+
sys.exit(1)
106+
line = line.strip()
107+
if len(line) == 0:
108+
# empty line
109+
continue
110+
if re_preproc_line.match(line):
111+
# preprocessor line number comment
112+
continue
113+
if line == '};':
114+
# end of table (we assume it appears on a single line)
115+
break
116+
table_contents.append(line)
117+
118+
# make combined string of entries
119+
entries_str = ''.join(table_contents)
120+
121+
# split into individual entries
122+
entries = []
123+
while entries_str:
124+
# look for single entry, by matching nested braces
125+
match = None
126+
if entries_str[0] == '{':
127+
nested_braces = 0
128+
for i in range(len(entries_str)):
129+
if entries_str[i] == '{':
130+
nested_braces += 1
131+
elif entries_str[i] == '}':
132+
nested_braces -= 1
133+
if nested_braces == 0:
134+
match = re_map_entry.match(entries_str[:i + 2])
135+
break
136+
137+
if not match:
138+
print('unknown line in table:', entries_str)
139+
sys.exit(1)
140+
141+
# extract single entry
142+
line = match.group(0)
143+
qstr = match.group(1)
144+
entries_str = entries_str[len(line):].lstrip()
145+
146+
# add the qstr and the whole line to list of all entries
147+
entries.append((qstr, line))
148+
149+
# sort entries so hash table construction is deterministic
150+
entries.sort()
151+
152+
# create hash table
153+
map = [None] * int(len(entries) * table_size_mult)
154+
for qstr, line in entries:
155+
# We assume that qstr does not have any escape sequences in it.
156+
# This is reasonably safe, since keys in a module or class dict
157+
# should be standard identifiers.
158+
# TODO verify this and raise an error if escape sequence found
159+
hash_insert(map, qstr, line)
160+
161+
# compute statistics
162+
total_attempts = 0
163+
for qstr, _ in entries:
164+
attempts, line = hash_find(map, qstr)
165+
assert line is not None
166+
if print_debug:
167+
print(' %s lookup took %u attempts' % (qstr, attempts))
168+
total_attempts += attempts
169+
if len(entries):
170+
stats = len(map), len(entries) / len(map), total_attempts / len(entries)
171+
else:
172+
stats = 0, 0, 0
173+
if print_debug:
174+
print(' table stats: size=%d, load=%.2f, avg_lookups=%.1f' % stats)
175+
176+
# output hash table
177+
for row in map:
178+
if row is None:
179+
output.append('{ 0, 0 },\n')
180+
else:
181+
output.append(row[1] + '\n')
182+
output.append('};\n')
183+
184+
# skip to next non-blank line
185+
while True:
186+
line = file.readline()
187+
if len(line) == 0:
188+
print('unexpected end of input')
189+
sys.exit(1)
190+
line = line.strip()
191+
if len(line) == 0:
192+
continue
193+
break
194+
195+
# transform the is_ordered param from 1 to 0
196+
match = re_mp_obj_dict_t.match(line)
197+
if match is None:
198+
match = re_mp_map_t.match(line)
199+
if match is None:
200+
print('expecting mp_obj_dict_t or mp_map_t definition')
201+
print(output[0])
202+
print(line)
203+
sys.exit(1)
204+
line = match.group('head') + '0' + match.group('tail') + '\n'
205+
output.append(line)
206+
207+
return (match.group('id'),) + stats
208+
209+
def process_file(filename):
210+
output = []
211+
file_changed = False
212+
with open(filename, 'rt') as f:
213+
while True:
214+
line = f.readline()
215+
if not line:
216+
break
217+
if re_mp_rom_map_elem_t.match(line):
218+
file_changed = True
219+
stats = process_map_table(f, line, output)
220+
if print_stats:
221+
print(' [%s: size=%d, load=%.2f, avg_lookups=%.1f]' % stats)
222+
else:
223+
output.append(line)
224+
225+
if file_changed:
226+
if print_debug:
227+
print(' modifying static maps in', output[0].strip())
228+
with open(filename, 'wt') as f:
229+
for line in output:
230+
f.write(line)
231+
232+
def main():
233+
# run actual C compiler
234+
# need to quote args that have special characters in them
235+
def quote(s):
236+
if s.find('<') != -1 or s.find('>') != -1:
237+
return "'" + s + "'"
238+
else:
239+
return s
240+
ret = os.system(cc1_path + ' ' + ' '.join(quote(s) for s in sys.argv[1:]))
241+
if ret != 0:
242+
ret = (ret & 0x7f) or 127 # make it in range 0-127, but non-zero
243+
sys.exit(ret)
244+
245+
if sys.argv[1] == '-E':
246+
# CPP has been run, now do our processing stage
247+
for i, arg in enumerate(sys.argv):
248+
if arg == '-o':
249+
return process_file(sys.argv[i + 1])
250+
251+
print('%s: could not find "-o" option' % (sys.argv[0],))
252+
sys.exit(1)
253+
elif sys.argv[1] == '-fpreprocessed':
254+
# compiler has been run, nothing more to do
255+
return
256+
else:
257+
# unknown processing stage
258+
print('%s: unknown first option "%s"' % (sys.argv[0], sys.argv[1]))
259+
sys.exit(1)
260+
261+
if __name__ == '__main__':
262+
main()

0 commit comments

Comments
 (0)