@@ -56,54 +56,70 @@ fn signature_parts(sig: &str) -> Option<(Range<usize>, Range<usize>)> {
5656 Some ( ( params, ret_start..ret_end) )
5757}
5858
59- /// Find the byte ranges where two strings differ, using longest common
60- /// prefix and suffix. Returns `None` if the strings are equal.
59+ /// Find the UTF-8-safe byte ranges where two strings differ, using longest
60+ /// common prefix and suffix. Returns `None` if the strings are equal.
6161///
6262/// The returned ranges highlight the "differing middle" of each string.
6363/// When one string is a strict prefix/suffix of the other, a minimal
64- /// single-byte range is returned to ensure there's always something to annotate.
65- ///
66- /// Note: operates on raw bytes, which is correct for ASCII type names but
67- /// could produce ranges that split multi-byte UTF-8 characters for non-ASCII
68- /// identifiers.
64+ /// single-character range is returned to ensure there's always something to
65+ /// annotate.
6966fn diff_ranges ( expected : & str , found : & str ) -> Option < ( Range < usize > , Range < usize > ) > {
7067 if expected == found {
7168 return None ;
7269 }
73- let expected_bytes = expected. as_bytes ( ) ;
74- let found_bytes = found. as_bytes ( ) ;
75- let mut lcp = 0 ;
76- while lcp < expected_bytes. len ( )
77- && lcp < found_bytes. len ( )
78- && expected_bytes[ lcp] == found_bytes[ lcp]
79- {
80- lcp += 1 ;
81- }
82- let mut lcs = 0 ;
83- while expected_bytes. len ( ) > lcp + lcs
84- && found_bytes. len ( ) > lcp + lcs
85- && expected_bytes[ expected_bytes. len ( ) - 1 - lcs]
86- == found_bytes[ found_bytes. len ( ) - 1 - lcs]
87- {
88- lcs += 1 ;
89- }
90- let expected_end = expected_bytes. len ( ) . saturating_sub ( lcs) ;
91- let found_end = found_bytes. len ( ) . saturating_sub ( lcs) ;
70+
71+ let expected_char_len = expected. chars ( ) . count ( ) ;
72+ let found_char_len = found. chars ( ) . count ( ) ;
73+
74+ // Count matching characters from the front.
75+ let lcp = expected
76+ . chars ( )
77+ . zip ( found. chars ( ) )
78+ . take_while ( |( a, b) | a == b)
79+ . count ( ) ;
80+
81+ // Count matching characters from the end, not overlapping with the prefix.
82+ let max_suffix = std:: cmp:: min ( expected_char_len - lcp, found_char_len - lcp) ;
83+ let lcs = expected
84+ . chars ( )
85+ . rev ( )
86+ . zip ( found. chars ( ) . rev ( ) )
87+ . take ( max_suffix)
88+ . take_while ( |( a, b) | a == b)
89+ . count ( ) ;
90+
91+ let expected_end = expected_char_len - lcs;
92+ let found_end = found_char_len - lcs;
93+
94+ // Collect byte offsets for each character index in a single pass per string.
95+ // Index `i` gives the byte offset of the `i`-th character; the final entry
96+ // is the total byte length, so lookups for `n == char_count` work too.
97+ let byte_offsets = |s : & str | -> Vec < usize > {
98+ s. char_indices ( )
99+ . map ( |( i, _) | i)
100+ . chain ( std:: iter:: once ( s. len ( ) ) )
101+ . collect ( )
102+ } ;
103+ let expected_offsets = byte_offsets ( expected) ;
104+ let found_offsets = byte_offsets ( found) ;
105+
92106 let expected_span = if expected_end > lcp {
93- lcp.. expected_end
107+ expected_offsets [ lcp] ..expected_offsets [ expected_end]
94108 } else {
95109 // The expected params are a prefix of the found params (or vice versa).
96110 // Point at the first character after the shared prefix, which in the
97111 // full source corresponds to the closing `)` or `,` — indicating where
98112 // parameters are missing or extra. Clamp to the string length to avoid
99113 // producing an out-of-bounds range when the entire string is a prefix
100114 // (e.g., for Callable types whose return type ends at the string boundary).
101- lcp..( lcp + 1 ) . min ( expected_bytes. len ( ) )
115+ let next = ( lcp + 1 ) . min ( expected_char_len) ;
116+ expected_offsets[ lcp] ..expected_offsets[ next]
102117 } ;
103118 let found_span = if found_end > lcp {
104- lcp.. found_end
119+ found_offsets [ lcp] ..found_offsets [ found_end]
105120 } else {
106- lcp..( lcp + 1 ) . min ( found_bytes. len ( ) )
121+ let next = ( lcp + 1 ) . min ( found_char_len) ;
122+ found_offsets[ lcp] ..found_offsets[ next]
107123 } ;
108124 Some ( ( expected_span, found_span) )
109125}
@@ -475,4 +491,51 @@ class B(A):
475491 "Expected a signature diff for differing return types"
476492 ) ;
477493 }
494+
495+ #[ test]
496+ fn test_render_signature_diff_unicode_literal_return_type ( ) {
497+ use super :: render_signature_diff;
498+
499+ let expected = "def _money_desc(cls: type[PensionAsset]) -> Literal['累计可领(元)']: ..." ;
500+ let found = "def _money_desc(cls: type[PensionAsset]) -> Literal['90岁累计可领(元)']: ..." ;
501+ let result = render_signature_diff ( expected, found) ;
502+ let lines = result. expect ( "Expected a signature diff for differing Unicode return types" ) ;
503+ let joined = lines. join ( "\n " ) ;
504+ assert ! (
505+ joined. contains( "return type" ) ,
506+ "Expected return type annotation in diff, got:\n {joined}"
507+ ) ;
508+ assert ! (
509+ !joined. contains( "parameters" ) ,
510+ "Expected no parameter annotation (params are identical), got:\n {joined}"
511+ ) ;
512+
513+ let expected = "def _money_desc(cls: type[PensionAsset]) -> Literal['累计可领']: ..." ;
514+ let found = "def _money_desc(cls: type[PensionAsset]) -> Literal['累计可累']: ..." ;
515+ let result = render_signature_diff ( expected, found) ;
516+ let lines = result. expect ( "Expected a signature diff for differing Unicode return types" ) ;
517+ let joined = lines. join ( "\n " ) ;
518+ assert ! (
519+ joined. contains( "return type" ) ,
520+ "Expected return type annotation in diff, got:\n {joined}"
521+ ) ;
522+ assert ! (
523+ !joined. contains( "parameters" ) ,
524+ "Expected no parameter annotation (params are identical), got:\n {joined}"
525+ ) ;
526+
527+ let expected = "def _money_desc(cls: type[PensionAsset]) -> Literal['累计可领']: ..." ;
528+ let found = "def _money_desc(cls: type[PensionAsset]) -> Literal['领计可领']: ..." ;
529+ let result = render_signature_diff ( expected, found) ;
530+ let lines = result. expect ( "Expected a signature diff for differing Unicode return types" ) ;
531+ let joined = lines. join ( "\n " ) ;
532+ assert ! (
533+ joined. contains( "return type" ) ,
534+ "Expected return type annotation in diff, got:\n {joined}"
535+ ) ;
536+ assert ! (
537+ !joined. contains( "parameters" ) ,
538+ "Expected no parameter annotation (params are identical), got:\n {joined}"
539+ ) ;
540+ }
478541}
0 commit comments