Skip to content

Commit ff72f08

Browse files
asukaminato0721meta-codesync[bot]
authored andcommitted
fix PANIC Sorry, Pyrefly crashed, this is always a bug in Pyrefly itself #2722 (#2755)
Summary: Fixes #2722 Fixed the crash by making override signature diff spans UTF-8 safe. It now computes prefix/suffix matches by character and converts back to byte offsets only at valid char boundaries. Pull Request resolved: #2755 Test Plan: added regression test mypy primer does not panic Reviewed By: grievejia Differential Revision: D95951583 Pulled By: yangdanny97 fbshipit-source-id: b147fc2756fc132ea73dc03efce52bb99d1c9fb7
1 parent 86d8a0a commit ff72f08

2 files changed

Lines changed: 112 additions & 30 deletions

File tree

pyrefly/lib/error/signature_diff.rs

Lines changed: 93 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -56,54 +56,70 @@ fn signature_parts(sig: &str) -> Option<(Range<usize>, Range<usize>)> {
5656
Some((params, ret_start..ret_end))
5757
}
5858

59-
/// Find the byte ranges where two strings differ, using longest common
60-
/// prefix and suffix. Returns `None` if the strings are equal.
59+
/// Find the UTF-8-safe byte ranges where two strings differ, using longest
60+
/// common prefix and suffix. Returns `None` if the strings are equal.
6161
///
6262
/// The returned ranges highlight the "differing middle" of each string.
6363
/// When one string is a strict prefix/suffix of the other, a minimal
64-
/// single-byte range is returned to ensure there's always something to annotate.
65-
///
66-
/// Note: operates on raw bytes, which is correct for ASCII type names but
67-
/// could produce ranges that split multi-byte UTF-8 characters for non-ASCII
68-
/// identifiers.
64+
/// single-character range is returned to ensure there's always something to
65+
/// annotate.
6966
fn diff_ranges(expected: &str, found: &str) -> Option<(Range<usize>, Range<usize>)> {
7067
if expected == found {
7168
return None;
7269
}
73-
let expected_bytes = expected.as_bytes();
74-
let found_bytes = found.as_bytes();
75-
let mut lcp = 0;
76-
while lcp < expected_bytes.len()
77-
&& lcp < found_bytes.len()
78-
&& expected_bytes[lcp] == found_bytes[lcp]
79-
{
80-
lcp += 1;
81-
}
82-
let mut lcs = 0;
83-
while expected_bytes.len() > lcp + lcs
84-
&& found_bytes.len() > lcp + lcs
85-
&& expected_bytes[expected_bytes.len() - 1 - lcs]
86-
== found_bytes[found_bytes.len() - 1 - lcs]
87-
{
88-
lcs += 1;
89-
}
90-
let expected_end = expected_bytes.len().saturating_sub(lcs);
91-
let found_end = found_bytes.len().saturating_sub(lcs);
70+
71+
let expected_char_len = expected.chars().count();
72+
let found_char_len = found.chars().count();
73+
74+
// Count matching characters from the front.
75+
let lcp = expected
76+
.chars()
77+
.zip(found.chars())
78+
.take_while(|(a, b)| a == b)
79+
.count();
80+
81+
// Count matching characters from the end, not overlapping with the prefix.
82+
let max_suffix = std::cmp::min(expected_char_len - lcp, found_char_len - lcp);
83+
let lcs = expected
84+
.chars()
85+
.rev()
86+
.zip(found.chars().rev())
87+
.take(max_suffix)
88+
.take_while(|(a, b)| a == b)
89+
.count();
90+
91+
let expected_end = expected_char_len - lcs;
92+
let found_end = found_char_len - lcs;
93+
94+
// Collect byte offsets for each character index in a single pass per string.
95+
// Index `i` gives the byte offset of the `i`-th character; the final entry
96+
// is the total byte length, so lookups for `n == char_count` work too.
97+
let byte_offsets = |s: &str| -> Vec<usize> {
98+
s.char_indices()
99+
.map(|(i, _)| i)
100+
.chain(std::iter::once(s.len()))
101+
.collect()
102+
};
103+
let expected_offsets = byte_offsets(expected);
104+
let found_offsets = byte_offsets(found);
105+
92106
let expected_span = if expected_end > lcp {
93-
lcp..expected_end
107+
expected_offsets[lcp]..expected_offsets[expected_end]
94108
} else {
95109
// The expected params are a prefix of the found params (or vice versa).
96110
// Point at the first character after the shared prefix, which in the
97111
// full source corresponds to the closing `)` or `,` — indicating where
98112
// parameters are missing or extra. Clamp to the string length to avoid
99113
// producing an out-of-bounds range when the entire string is a prefix
100114
// (e.g., for Callable types whose return type ends at the string boundary).
101-
lcp..(lcp + 1).min(expected_bytes.len())
115+
let next = (lcp + 1).min(expected_char_len);
116+
expected_offsets[lcp]..expected_offsets[next]
102117
};
103118
let found_span = if found_end > lcp {
104-
lcp..found_end
119+
found_offsets[lcp]..found_offsets[found_end]
105120
} else {
106-
lcp..(lcp + 1).min(found_bytes.len())
121+
let next = (lcp + 1).min(found_char_len);
122+
found_offsets[lcp]..found_offsets[next]
107123
};
108124
Some((expected_span, found_span))
109125
}
@@ -475,4 +491,51 @@ class B(A):
475491
"Expected a signature diff for differing return types"
476492
);
477493
}
494+
495+
#[test]
496+
fn test_render_signature_diff_unicode_literal_return_type() {
497+
use super::render_signature_diff;
498+
499+
let expected = "def _money_desc(cls: type[PensionAsset]) -> Literal['累计可领(元)']: ...";
500+
let found = "def _money_desc(cls: type[PensionAsset]) -> Literal['90岁累计可领(元)']: ...";
501+
let result = render_signature_diff(expected, found);
502+
let lines = result.expect("Expected a signature diff for differing Unicode return types");
503+
let joined = lines.join("\n");
504+
assert!(
505+
joined.contains("return type"),
506+
"Expected return type annotation in diff, got:\n{joined}"
507+
);
508+
assert!(
509+
!joined.contains("parameters"),
510+
"Expected no parameter annotation (params are identical), got:\n{joined}"
511+
);
512+
513+
let expected = "def _money_desc(cls: type[PensionAsset]) -> Literal['累计可领']: ...";
514+
let found = "def _money_desc(cls: type[PensionAsset]) -> Literal['累计可累']: ...";
515+
let result = render_signature_diff(expected, found);
516+
let lines = result.expect("Expected a signature diff for differing Unicode return types");
517+
let joined = lines.join("\n");
518+
assert!(
519+
joined.contains("return type"),
520+
"Expected return type annotation in diff, got:\n{joined}"
521+
);
522+
assert!(
523+
!joined.contains("parameters"),
524+
"Expected no parameter annotation (params are identical), got:\n{joined}"
525+
);
526+
527+
let expected = "def _money_desc(cls: type[PensionAsset]) -> Literal['累计可领']: ...";
528+
let found = "def _money_desc(cls: type[PensionAsset]) -> Literal['领计可领']: ...";
529+
let result = render_signature_diff(expected, found);
530+
let lines = result.expect("Expected a signature diff for differing Unicode return types");
531+
let joined = lines.join("\n");
532+
assert!(
533+
joined.contains("return type"),
534+
"Expected return type annotation in diff, got:\n{joined}"
535+
);
536+
assert!(
537+
!joined.contains("parameters"),
538+
"Expected no parameter annotation (params are identical), got:\n{joined}"
539+
);
540+
}
478541
}

pyrefly/lib/test/simple.rs

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -429,6 +429,25 @@ assert_type(f(1), int)
429429
"#,
430430
);
431431

432+
// https://github.com/facebook/pyrefly/issues/2722
433+
testcase!(
434+
test_signature_diff_no_panic,
435+
r#"
436+
class TestABC:
437+
pass
438+
439+
class Asset(TestABC):
440+
@classmethod
441+
def _money_desc(cls):
442+
return '累计可领(元)'
443+
444+
class PensionAsset(Asset):
445+
@classmethod
446+
def _money_desc(cls): # E: `PensionAsset._money_desc` has type `(cls: type[PensionAsset]) -> Literal['90岁累计可领(元)']`, which is not assignable to `(cls: type[PensionAsset]) -> Literal['累计可领(元)']`, the type of `Asset._money_desc`
447+
return '90岁累计可领(元)'
448+
"#,
449+
);
450+
432451
testcase!(
433452
test_final_annotated,
434453
r#"

0 commit comments

Comments
 (0)