Skip to content

Commit 3429de6

Browse files
author
_
committed
speed up special label matching
1 parent 3456b21 commit 3429de6

File tree

1 file changed

+60
-12
lines changed

1 file changed

+60
-12
lines changed

pfa.c

Lines changed: 60 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -186,14 +186,68 @@ static int isoptype(char c) {
186186
}
187187

188188
/* import keyword; print(*keyword.kwlist) */
189-
/* todo: build fast detection state machine, testing inclusion is 30% */
190189
static const char *specnames[] = {
191190
"and", "as", "assert", "break", "class", "continue", "def",
192191
"del", "elif", "else", "except", "finally", "for", "from",
193192
"global", "if", "import", "in", "is", "lambda", "nonlocal",
194193
"not", "or", "pass", "raise", "return", "try", "while",
195194
"with", "yield", NULL};
196195

196+
static int *spectable = NULL;
197+
static int *terminal = NULL;
198+
static void make_special_name_table() {
199+
/* string tree uses least memory; this is simpler to debug */
200+
int ncodes = 0;
201+
int nstates = 0;
202+
for (int i = 0; specnames[i]; i++) {
203+
nstates += strlen(specnames[i]) + 1;
204+
ncodes++;
205+
}
206+
spectable = (int *)malloc(sizeof(int) * 26 * nstates);
207+
terminal = (int *)malloc(sizeof(int) * nstates);
208+
/* by default all paths lead one to failure */
209+
for (int i = 0; i < 26 * nstates; i++) {
210+
spectable[i] = -1;
211+
}
212+
for (int i = 0; i < nstates; i++) {
213+
terminal[i] = 0;
214+
}
215+
/* fill in table, reusing old paths if available */
216+
int gstate = 1;
217+
for (int i = 0; i < ncodes; i++) {
218+
const char *s = specnames[i];
219+
int cstate = 0;
220+
for (int k = 0; s[k]; k++) {
221+
int cc = s[k] - 'a';
222+
if (spectable[26 * cstate + cc] == -1) {
223+
spectable[26 * cstate + cc] = gstate;
224+
cstate = gstate;
225+
gstate++;
226+
} else {
227+
cstate = spectable[26 * cstate + cc];
228+
}
229+
}
230+
terminal[cstate] = 1;
231+
}
232+
}
233+
static void free_special_name_table() {
234+
free(spectable);
235+
free(terminal);
236+
}
237+
static int is_special_name(const char *tst) {
238+
int fcode = 0;
239+
for (int k = 0; tst[k]; k++) {
240+
if (tst[k] < 'a' || tst[k] > 'z') {
241+
return 0;
242+
}
243+
fcode = spectable[26 * fcode + (tst[k] - 'a')];
244+
if (fcode == -1) {
245+
return 0;
246+
}
247+
}
248+
return terminal[fcode];
249+
}
250+
197251
static void pyformat(FILE *file, FILE *out, struct vlbuf *origfile,
198252
struct vlbuf *formfile) {
199253
struct vlbuf linebuf = vlbuf_make(sizeof(char));
@@ -535,14 +589,8 @@ static void pyformat(FILE *file, FILE *out, struct vlbuf *origfile,
535589
*tokd = '\0';
536590
++tokd;
537591
/* convert label to special if it's a word in a list we have */
538-
if (otok == TOK_LABEL) {
539-
/* todo: run constructed state machine to check if word in group. */
540-
for (const char **cc = &specnames[0]; *cc; ++cc) {
541-
if (strcmp(*cc, stokd) == 0) {
542-
otok = TOK_SPECIAL;
543-
break;
544-
}
545-
}
592+
if (otok == TOK_LABEL && is_special_name(stokd)) {
593+
otok = TOK_SPECIAL;
546594
}
547595
if (otok == TOK_OBRACE) {
548596
nestings++;
@@ -708,9 +756,6 @@ static void pyformat(FILE *file, FILE *out, struct vlbuf *origfile,
708756
int eoff = buildpt - laccum.d.ch;
709757

710758
/* the art of line breaking */
711-
712-
/* TODO: create a 'nesting depth' field */
713-
714759
int length_left = 80 - leading_spaces;
715760
formfilelen = vlbuf_append(formfile, lsp.d.ch, formfilelen, out);
716761

@@ -826,8 +871,10 @@ int main(int argc, char **argv) {
826871
fprintf(stderr, "Usage: pfa [files]\n");
827872
fprintf(stderr, " (in place) pfai [files]\n");
828873
}
874+
return 1;
829875
}
830876

877+
make_special_name_table();
831878
struct vlbuf origfile = vlbuf_make(sizeof(char));
832879
struct vlbuf formfile = vlbuf_make(sizeof(char));
833880
int maxnlen = 0;
@@ -892,4 +939,5 @@ int main(int argc, char **argv) {
892939
vlbuf_free(&origfile);
893940
vlbuf_free(&formfile);
894941
free(nbuf);
942+
free_special_name_table();
895943
}

0 commit comments

Comments
 (0)