@@ -186,14 +186,68 @@ static int isoptype(char c) {
186
186
}
187
187
188
188
/* import keyword; print(*keyword.kwlist) */
189
- /* todo: build fast detection state machine, testing inclusion is 30% */
190
189
static const char * specnames [] = {
191
190
"and" , "as" , "assert" , "break" , "class" , "continue" , "def" ,
192
191
"del" , "elif" , "else" , "except" , "finally" , "for" , "from" ,
193
192
"global" , "if" , "import" , "in" , "is" , "lambda" , "nonlocal" ,
194
193
"not" , "or" , "pass" , "raise" , "return" , "try" , "while" ,
195
194
"with" , "yield" , NULL };
196
195
196
+ static int * spectable = NULL ;
197
+ static int * terminal = NULL ;
198
+ static void make_special_name_table () {
199
+ /* string tree uses least memory; this is simpler to debug */
200
+ int ncodes = 0 ;
201
+ int nstates = 0 ;
202
+ for (int i = 0 ; specnames [i ]; i ++ ) {
203
+ nstates += strlen (specnames [i ]) + 1 ;
204
+ ncodes ++ ;
205
+ }
206
+ spectable = (int * )malloc (sizeof (int ) * 26 * nstates );
207
+ terminal = (int * )malloc (sizeof (int ) * nstates );
208
+ /* by default all paths lead one to failure */
209
+ for (int i = 0 ; i < 26 * nstates ; i ++ ) {
210
+ spectable [i ] = -1 ;
211
+ }
212
+ for (int i = 0 ; i < nstates ; i ++ ) {
213
+ terminal [i ] = 0 ;
214
+ }
215
+ /* fill in table, reusing old paths if available */
216
+ int gstate = 1 ;
217
+ for (int i = 0 ; i < ncodes ; i ++ ) {
218
+ const char * s = specnames [i ];
219
+ int cstate = 0 ;
220
+ for (int k = 0 ; s [k ]; k ++ ) {
221
+ int cc = s [k ] - 'a' ;
222
+ if (spectable [26 * cstate + cc ] == -1 ) {
223
+ spectable [26 * cstate + cc ] = gstate ;
224
+ cstate = gstate ;
225
+ gstate ++ ;
226
+ } else {
227
+ cstate = spectable [26 * cstate + cc ];
228
+ }
229
+ }
230
+ terminal [cstate ] = 1 ;
231
+ }
232
+ }
233
+ static void free_special_name_table () {
234
+ free (spectable );
235
+ free (terminal );
236
+ }
237
+ static int is_special_name (const char * tst ) {
238
+ int fcode = 0 ;
239
+ for (int k = 0 ; tst [k ]; k ++ ) {
240
+ if (tst [k ] < 'a' || tst [k ] > 'z' ) {
241
+ return 0 ;
242
+ }
243
+ fcode = spectable [26 * fcode + (tst [k ] - 'a' )];
244
+ if (fcode == -1 ) {
245
+ return 0 ;
246
+ }
247
+ }
248
+ return terminal [fcode ];
249
+ }
250
+
197
251
static void pyformat (FILE * file , FILE * out , struct vlbuf * origfile ,
198
252
struct vlbuf * formfile ) {
199
253
struct vlbuf linebuf = vlbuf_make (sizeof (char ));
@@ -535,14 +589,8 @@ static void pyformat(FILE *file, FILE *out, struct vlbuf *origfile,
535
589
* tokd = '\0' ;
536
590
++ tokd ;
537
591
/* convert label to special if it's a word in a list we have */
538
- if (otok == TOK_LABEL ) {
539
- /* todo: run constructed state machine to check if word in group. */
540
- for (const char * * cc = & specnames [0 ]; * cc ; ++ cc ) {
541
- if (strcmp (* cc , stokd ) == 0 ) {
542
- otok = TOK_SPECIAL ;
543
- break ;
544
- }
545
- }
592
+ if (otok == TOK_LABEL && is_special_name (stokd )) {
593
+ otok = TOK_SPECIAL ;
546
594
}
547
595
if (otok == TOK_OBRACE ) {
548
596
nestings ++ ;
@@ -708,9 +756,6 @@ static void pyformat(FILE *file, FILE *out, struct vlbuf *origfile,
708
756
int eoff = buildpt - laccum .d .ch ;
709
757
710
758
/* the art of line breaking */
711
-
712
- /* TODO: create a 'nesting depth' field */
713
-
714
759
int length_left = 80 - leading_spaces ;
715
760
formfilelen = vlbuf_append (formfile , lsp .d .ch , formfilelen , out );
716
761
@@ -826,8 +871,10 @@ int main(int argc, char **argv) {
826
871
fprintf (stderr , "Usage: pfa [files]\n" );
827
872
fprintf (stderr , " (in place) pfai [files]\n" );
828
873
}
874
+ return 1 ;
829
875
}
830
876
877
+ make_special_name_table ();
831
878
struct vlbuf origfile = vlbuf_make (sizeof (char ));
832
879
struct vlbuf formfile = vlbuf_make (sizeof (char ));
833
880
int maxnlen = 0 ;
@@ -892,4 +939,5 @@ int main(int argc, char **argv) {
892
939
vlbuf_free (& origfile );
893
940
vlbuf_free (& formfile );
894
941
free (nbuf );
942
+ free_special_name_table ();
895
943
}
0 commit comments