@@ -430,15 +430,6 @@ typedef enum {
430430 PM_REGEXP_PROPERTY_UNICODE
431431} pm_regexp_property_type_t ;
432432
433- /**
434- * Check if a property name matches a NUL-terminated target string
435- * (case-insensitive, exact length match).
436- */
437- static inline bool
438- pm_regexp_property_name_matches (const uint8_t * name , size_t length , const char * target ) {
439- return strlen (target ) == length && pm_strncasecmp (name , (const uint8_t * ) target , length ) == 0 ;
440- }
441-
442433/**
443434 * Classify a property name. The name may start with '^' for negation, which
444435 * is skipped before matching.
@@ -451,30 +442,63 @@ pm_regexp_classify_property(const uint8_t *name, size_t length) {
451442 length -- ;
452443 }
453444
454- // POSIX properties — valid in all encodings.
455- static const char * const posix_properties [] = {
456- "Alnum" , "Alpha" , "ASCII" , "Blank" , "Cntrl" , "Digit" , "Graph" ,
457- "Lower" , "Print" , "Punct" , "Space" , "Upper" , "XDigit" , "Word" ,
458- NULL
459- };
445+ #define PM_REGEXP_CASECMP (str_ ) (pm_strncasecmp(name, (const uint8_t *) (str_), length) == 0)
460446
461- for (const char * const * property = posix_properties ; * property != NULL ; property ++ ) {
462- if (pm_regexp_property_name_matches (name , length , * property )) {
463- return PM_REGEXP_PROPERTY_POSIX ;
464- }
447+ switch (length ) {
448+ case 3 :
449+ if (PM_REGEXP_CASECMP ("Han" )) return PM_REGEXP_PROPERTY_SCRIPT ;
450+ break ;
451+ case 4 :
452+ if (PM_REGEXP_CASECMP ("Word" )) return PM_REGEXP_PROPERTY_POSIX ;
453+ break ;
454+ case 5 :
455+ /* Most properties are length 5, so dispatch on first character. */
456+ switch (name [0 ] | 0x20 ) {
457+ case 'a' :
458+ if (PM_REGEXP_CASECMP ("Alnum" )) return PM_REGEXP_PROPERTY_POSIX ;
459+ if (PM_REGEXP_CASECMP ("Alpha" )) return PM_REGEXP_PROPERTY_POSIX ;
460+ if (PM_REGEXP_CASECMP ("ASCII" )) return PM_REGEXP_PROPERTY_POSIX ;
461+ break ;
462+ case 'b' :
463+ if (PM_REGEXP_CASECMP ("Blank" )) return PM_REGEXP_PROPERTY_POSIX ;
464+ break ;
465+ case 'c' :
466+ if (PM_REGEXP_CASECMP ("Cntrl" )) return PM_REGEXP_PROPERTY_POSIX ;
467+ break ;
468+ case 'd' :
469+ if (PM_REGEXP_CASECMP ("Digit" )) return PM_REGEXP_PROPERTY_POSIX ;
470+ break ;
471+ case 'g' :
472+ if (PM_REGEXP_CASECMP ("Graph" )) return PM_REGEXP_PROPERTY_POSIX ;
473+ if (PM_REGEXP_CASECMP ("Greek" )) return PM_REGEXP_PROPERTY_SCRIPT ;
474+ break ;
475+ case 'l' :
476+ if (PM_REGEXP_CASECMP ("Lower" )) return PM_REGEXP_PROPERTY_POSIX ;
477+ if (PM_REGEXP_CASECMP ("Latin" )) return PM_REGEXP_PROPERTY_SCRIPT ;
478+ break ;
479+ case 'p' :
480+ if (PM_REGEXP_CASECMP ("Print" )) return PM_REGEXP_PROPERTY_POSIX ;
481+ if (PM_REGEXP_CASECMP ("Punct" )) return PM_REGEXP_PROPERTY_POSIX ;
482+ break ;
483+ case 's' :
484+ if (PM_REGEXP_CASECMP ("Space" )) return PM_REGEXP_PROPERTY_POSIX ;
485+ break ;
486+ case 'u' :
487+ if (PM_REGEXP_CASECMP ("Upper" )) return PM_REGEXP_PROPERTY_POSIX ;
488+ break ;
489+ }
490+ break ;
491+ case 6 :
492+ if (PM_REGEXP_CASECMP ("XDigit" )) return PM_REGEXP_PROPERTY_POSIX ;
493+ break ;
494+ case 8 :
495+ if (PM_REGEXP_CASECMP ("Hiragana" )) return PM_REGEXP_PROPERTY_SCRIPT ;
496+ if (PM_REGEXP_CASECMP ("Katakana" )) return PM_REGEXP_PROPERTY_SCRIPT ;
497+ if (PM_REGEXP_CASECMP ("Cyrillic" )) return PM_REGEXP_PROPERTY_SCRIPT ;
498+ break ;
465499 }
466500
467- // Script properties — valid in /e, /s, /u but not /n.
468- static const char * const script_properties [] = {
469- "Hiragana" , "Katakana" , "Han" , "Latin" , "Greek" , "Cyrillic" ,
470- NULL
471- };
472-
473- for (const char * const * property = script_properties ; * property != NULL ; property ++ ) {
474- if (pm_regexp_property_name_matches (name , length , * property )) {
475- return PM_REGEXP_PROPERTY_SCRIPT ;
476- }
477- }
501+ #undef PM_REGEXP_CASECMP
478502
479503 // Everything else is Unicode-only (general categories, other scripts, etc.).
480504 return PM_REGEXP_PROPERTY_UNICODE ;
0 commit comments