@@ -485,6 +485,64 @@ get_decomp_record(PyObject *self, Py_UCS4 code,
485485 (* index )++ ;
486486}
487487
488+ /* Small combining runs are usually cheaper with insertion sort. */
489+ #define CANONICAL_ORDERING_COUNTING_SORT_THRESHOLD 20
490+
491+ static void
492+ canonical_ordering_sort_insertion (int kind , void * data ,
493+ Py_ssize_t start , Py_ssize_t end )
494+ {
495+ for (Py_ssize_t i = start + 1 ; i < end ; i ++ ) {
496+ Py_UCS4 code = PyUnicode_READ (kind , data , i );
497+ unsigned char combining = _getrecord_ex (code )-> combining ;
498+ Py_ssize_t j = i ;
499+
500+ while (j > start ) {
501+ Py_UCS4 previous = PyUnicode_READ (kind , data , j - 1 );
502+ if (_getrecord_ex (previous )-> combining <= combining ) {
503+ break ;
504+ }
505+ PyUnicode_WRITE (kind , data , j , previous );
506+ j -- ;
507+ }
508+ if (j != i ) {
509+ PyUnicode_WRITE (kind , data , j , code );
510+ }
511+ }
512+ }
513+
514+ static void
515+ canonical_ordering_sort_counting (int kind , void * data ,
516+ Py_ssize_t start , Py_ssize_t end ,
517+ Py_UCS4 * sortbuf )
518+ {
519+ Py_ssize_t counts [256 ] = {0 };
520+ Py_ssize_t run_length = end - start ;
521+ Py_ssize_t total = 0 ;
522+
523+ for (Py_ssize_t i = start ; i < end ; i ++ ) {
524+ Py_UCS4 code = PyUnicode_READ (kind , data , i );
525+ unsigned char combining = _getrecord_ex (code )-> combining ;
526+ counts [combining ]++ ;
527+ }
528+
529+ for (size_t i = 0 ; i < Py_ARRAY_LENGTH (counts ); i ++ ) {
530+ Py_ssize_t count = counts [i ];
531+ counts [i ] = total ;
532+ total += count ;
533+ }
534+
535+ /* Reuse counts[] as the next output slot for each CCC. */
536+ for (Py_ssize_t i = start ; i < end ; i ++ ) {
537+ Py_UCS4 code = PyUnicode_READ (kind , data , i );
538+ unsigned char combining = _getrecord_ex (code )-> combining ;
539+ sortbuf [counts [combining ]++ ] = code ;
540+ }
541+ for (Py_ssize_t i = 0 ; i < run_length ; i ++ ) {
542+ PyUnicode_WRITE (kind , data , start + i , sortbuf [i ]);
543+ }
544+ }
545+
488546#define SBase 0xAC00
489547#define LBase 0x1100
490548#define VBase 0x1161
@@ -501,13 +559,16 @@ nfd_nfkd(PyObject *self, PyObject *input, int k)
501559 PyObject * result ;
502560 Py_UCS4 * output ;
503561 Py_ssize_t i , o , osize ;
504- int kind ;
505- const void * data ;
562+ int input_kind , result_kind ;
563+ const void * input_data ;
564+ void * result_data ;
506565 /* Longest decomposition in Unicode 3.2: U+FDFA */
507566 Py_UCS4 stack [20 ];
508567 Py_ssize_t space , isize ;
509568 int index , prefix , count , stackptr ;
510569 unsigned char prev , cur ;
570+ Py_UCS4 * sortbuf = NULL ;
571+ Py_ssize_t sortbuflen = 0 ;
511572
512573 stackptr = 0 ;
513574 isize = PyUnicode_GET_LENGTH (input );
@@ -527,11 +588,11 @@ nfd_nfkd(PyObject *self, PyObject *input, int k)
527588 return NULL ;
528589 }
529590 i = o = 0 ;
530- kind = PyUnicode_KIND (input );
531- data = PyUnicode_DATA (input );
591+ input_kind = PyUnicode_KIND (input );
592+ input_data = PyUnicode_DATA (input );
532593
533594 while (i < isize ) {
534- stack [stackptr ++ ] = PyUnicode_READ (kind , data , i ++ );
595+ stack [stackptr ++ ] = PyUnicode_READ (input_kind , input_data , i ++ );
535596 while (stackptr ) {
536597 Py_UCS4 code = stack [-- stackptr ];
537598 /* Hangul Decomposition adds three characters in
@@ -597,34 +658,64 @@ nfd_nfkd(PyObject *self, PyObject *input, int k)
597658 if (!result )
598659 return NULL ;
599660 /* result is guaranteed to be ready, as it is compact. */
600- kind = PyUnicode_KIND (result );
601- data = PyUnicode_DATA (result );
661+ result_kind = PyUnicode_KIND (result );
662+ result_data = PyUnicode_DATA (result );
602663
603- /* Sort canonically. */
664+ /* Sort each consecutive combining-character run canonically. */
604665 i = 0 ;
605- prev = _getrecord_ex (PyUnicode_READ (kind , data , i ))-> combining ;
606- for (i ++ ; i < PyUnicode_GET_LENGTH (result ); i ++ ) {
607- cur = _getrecord_ex (PyUnicode_READ (kind , data , i ))-> combining ;
608- if (prev == 0 || cur == 0 || prev <= cur ) {
609- prev = cur ;
666+ while (i < o ) {
667+ Py_ssize_t run_length , run_start ;
668+ int needs_sort = 0 ;
669+
670+ Py_UCS4 ch = PyUnicode_READ (result_kind , result_data , i );
671+ prev = _getrecord_ex (ch )-> combining ;
672+ if (prev == 0 ) {
673+ i ++ ;
610674 continue ;
611675 }
612- /* Non-canonical order. Need to switch *i with previous. */
613- o = i - 1 ;
614- while (1 ) {
615- Py_UCS4 tmp = PyUnicode_READ (kind , data , o + 1 );
616- PyUnicode_WRITE (kind , data , o + 1 ,
617- PyUnicode_READ (kind , data , o ));
618- PyUnicode_WRITE (kind , data , o , tmp );
619- o -- ;
620- if (o < 0 )
621- break ;
622- prev = _getrecord_ex (PyUnicode_READ (kind , data , o ))-> combining ;
623- if (prev == 0 || prev <= cur )
676+
677+ run_start = i ++ ;
678+ while (i < o ) {
679+ Py_UCS4 ch = PyUnicode_READ (result_kind , result_data , i );
680+ cur = _getrecord_ex (ch )-> combining ;
681+ if (cur == 0 ) {
624682 break ;
683+ }
684+ if (prev > cur ) {
685+ needs_sort = 1 ;
686+ }
687+ prev = cur ;
688+ i ++ ;
689+ }
690+ if (!needs_sort ) {
691+ continue ;
692+ }
693+
694+ run_length = i - run_start ;
695+ if (run_length < CANONICAL_ORDERING_COUNTING_SORT_THRESHOLD ) {
696+ canonical_ordering_sort_insertion (result_kind , result_data ,
697+ run_start , i );
698+ continue ;
625699 }
626- prev = _getrecord_ex (PyUnicode_READ (kind , data , i ))-> combining ;
700+
701+ if (run_length > sortbuflen ) {
702+ Py_UCS4 * new_sortbuf = PyMem_Resize (sortbuf ,
703+ Py_UCS4 ,
704+ run_length );
705+ if (new_sortbuf == NULL ) {
706+ PyErr_NoMemory ();
707+ PyMem_Free (sortbuf );
708+ Py_DECREF (result );
709+ return NULL ;
710+ }
711+ sortbuf = new_sortbuf ;
712+ sortbuflen = run_length ;
713+ }
714+
715+ canonical_ordering_sort_counting (result_kind , result_data ,
716+ run_start , i , sortbuf );
627717 }
718+ PyMem_Free (sortbuf );
628719 return result ;
629720}
630721
0 commit comments