@@ -249,11 +249,9 @@ typedef struct TParser
249
249
/* string and position information */
250
250
char * str ; /* multibyte string */
251
251
int lenstr ; /* length of mbstring */
252
- #ifdef USE_WIDE_UPPER_LOWER
253
252
wchar_t * wstr ; /* wide character string */
254
253
pg_wchar * pgwstr ; /* wide character string for C-locale */
255
254
bool usewide ;
256
- #endif
257
255
258
256
/* State of parse */
259
257
int charmaxlen ;
@@ -302,8 +300,6 @@ TParserInit(char *str, int len)
302
300
prs -> str = str ;
303
301
prs -> lenstr = len ;
304
302
305
- #ifdef USE_WIDE_UPPER_LOWER
306
-
307
303
/*
308
304
* Use wide char code only when max encoding length > 1.
309
305
*/
@@ -331,7 +327,6 @@ TParserInit(char *str, int len)
331
327
}
332
328
else
333
329
prs -> usewide = false;
334
- #endif
335
330
336
331
prs -> state = newTParserPosition (NULL );
337
332
prs -> state -> state = TPS_Base ;
@@ -368,15 +363,12 @@ TParserCopyInit(const TParser *orig)
368
363
prs -> charmaxlen = orig -> charmaxlen ;
369
364
prs -> str = orig -> str + orig -> state -> posbyte ;
370
365
prs -> lenstr = orig -> lenstr - orig -> state -> posbyte ;
371
-
372
- #ifdef USE_WIDE_UPPER_LOWER
373
366
prs -> usewide = orig -> usewide ;
374
367
375
368
if (orig -> pgwstr )
376
369
prs -> pgwstr = orig -> pgwstr + orig -> state -> poschar ;
377
370
if (orig -> wstr )
378
371
prs -> wstr = orig -> wstr + orig -> state -> poschar ;
379
- #endif
380
372
381
373
prs -> state = newTParserPosition (NULL );
382
374
prs -> state -> state = TPS_Base ;
@@ -401,12 +393,10 @@ TParserClose(TParser *prs)
401
393
prs -> state = ptr ;
402
394
}
403
395
404
- #ifdef USE_WIDE_UPPER_LOWER
405
396
if (prs -> wstr )
406
397
pfree (prs -> wstr );
407
398
if (prs -> pgwstr )
408
399
pfree (prs -> pgwstr );
409
- #endif
410
400
411
401
#ifdef WPARSER_TRACE
412
402
fprintf (stderr , "closing parser\n" );
@@ -445,96 +435,45 @@ TParserCopyClose(TParser *prs)
445
435
* - if locale is C then we use pgwstr instead of wstr.
446
436
*/
447
437
448
- #ifdef USE_WIDE_UPPER_LOWER
449
-
450
- #define p_iswhat (type ) \
438
+ #define p_iswhat (type , nonascii ) \
439
+ \
451
440
static int \
452
- p_is##type(TParser *prs) { \
453
- Assert( prs->state ); \
454
- if ( prs->usewide ) \
441
+ p_is##type(TParser *prs) \
442
+ { \
443
+ Assert(prs->state); \
444
+ if (prs->usewide) \
455
445
{ \
456
- if ( prs->pgwstr ) \
446
+ if (prs->pgwstr) \
457
447
{ \
458
448
unsigned int c = *(prs->pgwstr + prs->state->poschar); \
459
- if ( c > 0x7f ) \
460
- return 0; \
461
- return is##type( c ); \
449
+ if (c > 0x7f) \
450
+ return nonascii; \
451
+ return is##type(c); \
462
452
} \
463
- return isw##type( *( prs->wstr + prs->state->poschar ) ); \
453
+ return isw##type(*( prs->wstr + prs->state->poschar)); \
464
454
} \
465
- \
466
- return is##type( *(unsigned char*)( prs->str + prs->state->posbyte ) ); \
467
- } \
455
+ return is##type(*(unsigned char *) (prs->str + prs->state->posbyte)); \
456
+ } \
468
457
\
469
458
static int \
470
- p_isnot##type(TParser *prs) { \
459
+ p_isnot##type(TParser *prs) \
460
+ { \
471
461
return !p_is##type(prs); \
472
462
}
473
463
474
- static int
475
- p_isalnum (TParser * prs )
476
- {
477
- Assert (prs -> state );
478
-
479
- if (prs -> usewide )
480
- {
481
- if (prs -> pgwstr )
482
- {
483
- unsigned int c = * (prs -> pgwstr + prs -> state -> poschar );
484
-
485
- /*
486
- * any non-ascii symbol with multibyte encoding with C-locale is
487
- * an alpha character
488
- */
489
- if (c > 0x7f )
490
- return 1 ;
491
-
492
- return isalnum (c );
493
- }
494
-
495
- return iswalnum (* (prs -> wstr + prs -> state -> poschar ));
496
- }
497
-
498
- return isalnum (* (unsigned char * ) (prs -> str + prs -> state -> posbyte ));
499
- }
500
- static int
501
- p_isnotalnum (TParser * prs )
502
- {
503
- return !p_isalnum (prs );
504
- }
505
-
506
- static int
507
- p_isalpha (TParser * prs )
508
- {
509
- Assert (prs -> state );
510
-
511
- if (prs -> usewide )
512
- {
513
- if (prs -> pgwstr )
514
- {
515
- unsigned int c = * (prs -> pgwstr + prs -> state -> poschar );
516
-
517
- /*
518
- * any non-ascii symbol with multibyte encoding with C-locale is
519
- * an alpha character
520
- */
521
- if (c > 0x7f )
522
- return 1 ;
523
-
524
- return isalpha (c );
525
- }
526
-
527
- return iswalpha (* (prs -> wstr + prs -> state -> poschar ));
528
- }
529
-
530
- return isalpha (* (unsigned char * ) (prs -> str + prs -> state -> posbyte ));
531
- }
532
-
533
- static int
534
- p_isnotalpha (TParser * prs )
535
- {
536
- return !p_isalpha (prs );
537
- }
464
+ /*
465
+ * In C locale with a multibyte encoding, any non-ASCII symbol is considered
466
+ * an alpha character, but not a member of other char classes.
467
+ */
468
+ p_iswhat (alnum , 1 )
469
+ p_iswhat (alpha , 1 )
470
+ p_iswhat (digit , 0 )
471
+ p_iswhat (lower , 0 )
472
+ p_iswhat (print , 0 )
473
+ p_iswhat (punct , 0 )
474
+ p_iswhat (space , 0 )
475
+ p_iswhat (upper , 0 )
476
+ p_iswhat (xdigit , 0 )
538
477
539
478
/* p_iseq should be used only for ascii symbols */
540
479
@@ -544,39 +483,6 @@ p_iseq(TParser *prs, char c)
544
483
Assert (prs -> state );
545
484
return ((prs -> state -> charlen == 1 && * (prs -> str + prs -> state -> posbyte ) == c )) ? 1 : 0 ;
546
485
}
547
- #else /* USE_WIDE_UPPER_LOWER */
548
-
549
- #define p_iswhat (type ) \
550
- static int \
551
- p_is##type(TParser *prs) { \
552
- Assert( prs->state ); \
553
- return is##type( (unsigned char)*( prs->str + prs->state->posbyte ) ); \
554
- } \
555
- \
556
- static int \
557
- p_isnot##type(TParser *prs) { \
558
- return !p_is##type(prs); \
559
- }
560
-
561
-
562
- static int
563
- p_iseq (TParser * prs , char c )
564
- {
565
- Assert (prs -> state );
566
- return (* (prs -> str + prs -> state -> posbyte ) == c ) ? 1 : 0 ;
567
- }
568
-
569
- p_iswhat (alnum )
570
- p_iswhat (alpha )
571
- #endif /* USE_WIDE_UPPER_LOWER */
572
-
573
- p_iswhat (digit )
574
- p_iswhat (lower )
575
- p_iswhat (print )
576
- p_iswhat (punct )
577
- p_iswhat (space )
578
- p_iswhat (upper )
579
- p_iswhat (xdigit )
580
486
581
487
static int
582
488
p_isEOF (TParser * prs )
@@ -793,8 +699,6 @@ p_isspecial(TParser *prs)
793
699
if (pg_dsplen (prs -> str + prs -> state -> posbyte ) == 0 )
794
700
return 1 ;
795
701
796
- #ifdef USE_WIDE_UPPER_LOWER
797
-
798
702
/*
799
703
* Unicode Characters in the 'Mark, Spacing Combining' Category That
800
704
* characters are not alpha although they are not breakers of word too.
@@ -1058,7 +962,6 @@ p_isspecial(TParser *prs)
1058
962
StopHigh = StopMiddle ;
1059
963
}
1060
964
}
1061
- #endif
1062
965
1063
966
return 0 ;
1064
967
}
0 commit comments