Skip to content

Commit 6d625cc

Browse files
Rollup merge of #145024 - Kmeakin:km/optimize-slice-index/v3, r=Mark-Simulacrum
Optimize indexing slices and strs with inclusive ranges Instead of separately checking for `end == usize::MAX` and `end + 1 > slice.len()`, we can check for `end >= slice.len()`. Also consolidate all the str indexing related panic functions into a single function which reports the correct error depending on the arguments, as the slice indexing code already does. The downside of all this is that the panic message is slightly less specific when trying to index with `[..=usize::MAX]`: instead of saying "attempted to index str up to maximum usize" it just says "end byte index {end} out of bounds". But this is a rare enough case that I think it is acceptable
2 parents 0318407 + f8cf68f commit 6d625cc

8 files changed

Lines changed: 302 additions & 98 deletions

File tree

library/alloctests/tests/str.rs

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -612,14 +612,14 @@ mod slice_index {
612612
data: "abcdef";
613613
good: data[4..4] == "";
614614
bad: data[4..3];
615-
message: "begin <= end (4 <= 3)";
615+
message: "begin > end (4 > 3)";
616616
}
617617

618618
in mod rangeinclusive_neg_width {
619619
data: "abcdef";
620620
good: data[4..=3] == "";
621621
bad: data[4..=2];
622-
message: "begin <= end (4 <= 3)";
622+
message: "begin > end (4 > 3)";
623623
}
624624
}
625625

@@ -630,13 +630,13 @@ mod slice_index {
630630
// note: using 0 specifically ensures that the result of overflowing is 0..0,
631631
// so that `get` doesn't simply return None for the wrong reason.
632632
bad: data[0..=usize::MAX];
633-
message: "maximum usize";
633+
message: "out of bounds";
634634
}
635635

636636
in mod rangetoinclusive {
637637
data: "hello";
638638
bad: data[..=usize::MAX];
639-
message: "maximum usize";
639+
message: "out of bounds";
640640
}
641641
}
642642
}
@@ -659,49 +659,49 @@ mod slice_index {
659659
data: super::DATA;
660660
bad: data[super::BAD_START..super::GOOD_END];
661661
message:
662-
"byte index 4 is not a char boundary; it is inside 'α' (bytes 3..5) of";
662+
"start byte index 4 is not a char boundary; it is inside 'α' (bytes 3..5) of";
663663
}
664664

665665
in mod range_2 {
666666
data: super::DATA;
667667
bad: data[super::GOOD_START..super::BAD_END];
668668
message:
669-
"byte index 6 is not a char boundary; it is inside 'β' (bytes 5..7) of";
669+
"end byte index 6 is not a char boundary; it is inside 'β' (bytes 5..7) of";
670670
}
671671

672672
in mod rangefrom {
673673
data: super::DATA;
674674
bad: data[super::BAD_START..];
675675
message:
676-
"byte index 4 is not a char boundary; it is inside 'α' (bytes 3..5) of";
676+
"start byte index 4 is not a char boundary; it is inside 'α' (bytes 3..5) of";
677677
}
678678

679679
in mod rangeto {
680680
data: super::DATA;
681681
bad: data[..super::BAD_END];
682682
message:
683-
"byte index 6 is not a char boundary; it is inside 'β' (bytes 5..7) of";
683+
"end byte index 6 is not a char boundary; it is inside 'β' (bytes 5..7) of";
684684
}
685685

686686
in mod rangeinclusive_1 {
687687
data: super::DATA;
688688
bad: data[super::BAD_START..=super::GOOD_END_INCL];
689689
message:
690-
"byte index 4 is not a char boundary; it is inside 'α' (bytes 3..5) of";
690+
"start byte index 4 is not a char boundary; it is inside 'α' (bytes 3..5) of";
691691
}
692692

693693
in mod rangeinclusive_2 {
694694
data: super::DATA;
695695
bad: data[super::GOOD_START..=super::BAD_END_INCL];
696696
message:
697-
"byte index 6 is not a char boundary; it is inside 'β' (bytes 5..7) of";
697+
"end byte index 6 is not a char boundary; it is inside 'β' (bytes 5..7) of";
698698
}
699699

700700
in mod rangetoinclusive {
701701
data: super::DATA;
702702
bad: data[..=super::BAD_END_INCL];
703703
message:
704-
"byte index 6 is not a char boundary; it is inside 'β' (bytes 5..7) of";
704+
"end byte index 6 is not a char boundary; it is inside 'β' (bytes 5..7) of";
705705
}
706706
}
707707
}
@@ -716,7 +716,9 @@ mod slice_index {
716716

717717
// check the panic includes the prefix of the sliced string
718718
#[test]
719-
#[should_panic(expected = "byte index 1024 is out of bounds of `Lorem ipsum dolor sit amet")]
719+
#[should_panic(
720+
expected = "end byte index 1024 is out of bounds of `Lorem ipsum dolor sit amet"
721+
)]
720722
fn test_slice_fail_truncated_1() {
721723
let _ = &LOREM_PARAGRAPH[..1024];
722724
}

library/core/src/range.rs

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -352,15 +352,6 @@ impl<Idx: Step> RangeInclusive<Idx> {
352352
}
353353
}
354354

355-
impl RangeInclusive<usize> {
356-
/// Converts to an exclusive `Range` for `SliceIndex` implementations.
357-
/// The caller is responsible for dealing with `last == usize::MAX`.
358-
#[inline]
359-
pub(crate) const fn into_slice_range(self) -> Range<usize> {
360-
Range { start: self.start, end: self.last + 1 }
361-
}
362-
}
363-
364355
#[stable(feature = "new_range_inclusive_api", since = "CURRENT_RUSTC_VERSION")]
365356
#[rustc_const_unstable(feature = "const_range", issue = "none")]
366357
impl<T> const RangeBounds<T> for RangeInclusive<T> {

library/core/src/slice/index.rs

Lines changed: 24 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -663,7 +663,6 @@ unsafe impl<T> const SliceIndex<[T]> for ops::RangeFull {
663663
}
664664

665665
/// The methods `index` and `index_mut` panic if:
666-
/// - the end of the range is `usize::MAX` or
667666
/// - the start of the range is greater than the end of the range or
668667
/// - the end of the range is out of bounds.
669668
#[stable(feature = "inclusive_range", since = "1.26.0")]
@@ -673,12 +672,12 @@ unsafe impl<T> const SliceIndex<[T]> for ops::RangeInclusive<usize> {
673672

674673
#[inline]
675674
fn get(self, slice: &[T]) -> Option<&[T]> {
676-
if *self.end() == usize::MAX { None } else { self.into_slice_range().get(slice) }
675+
if *self.end() >= slice.len() { None } else { self.into_slice_range().get(slice) }
677676
}
678677

679678
#[inline]
680679
fn get_mut(self, slice: &mut [T]) -> Option<&mut [T]> {
681-
if *self.end() == usize::MAX { None } else { self.into_slice_range().get_mut(slice) }
680+
if *self.end() >= slice.len() { None } else { self.into_slice_range().get_mut(slice) }
682681
}
683682

684683
#[inline]
@@ -950,8 +949,7 @@ where
950949
R: ops::RangeBounds<usize>,
951950
{
952951
let len = bounds.end;
953-
let r = into_range(len, (range.start_bound().copied(), range.end_bound().copied()))?;
954-
if r.start > r.end || r.end > len { None } else { Some(r) }
952+
try_into_slice_range(len, (range.start_bound().copied(), range.end_bound().copied()))
955953
}
956954

957955
/// Converts a pair of `ops::Bound`s into `ops::Range` without performing any
@@ -978,25 +976,31 @@ pub(crate) const fn into_range_unchecked(
978976
/// Returns `None` on overflowing indices.
979977
#[rustc_const_unstable(feature = "const_range", issue = "none")]
980978
#[inline]
981-
pub(crate) const fn into_range(
979+
pub(crate) const fn try_into_slice_range(
982980
len: usize,
983981
(start, end): (ops::Bound<usize>, ops::Bound<usize>),
984982
) -> Option<ops::Range<usize>> {
985-
use ops::Bound;
986-
let start = match start {
987-
Bound::Included(start) => start,
988-
Bound::Excluded(start) => start.checked_add(1)?,
989-
Bound::Unbounded => 0,
990-
};
991-
992983
let end = match end {
993-
Bound::Included(end) => end.checked_add(1)?,
994-
Bound::Excluded(end) => end,
995-
Bound::Unbounded => len,
984+
ops::Bound::Included(end) if end >= len => return None,
985+
// Cannot overflow because `end < len` implies `end < usize::MAX`.
986+
ops::Bound::Included(end) => end + 1,
987+
988+
ops::Bound::Excluded(end) if end > len => return None,
989+
ops::Bound::Excluded(end) => end,
990+
991+
ops::Bound::Unbounded => len,
996992
};
997993

998-
// Don't bother with checking `start < end` and `end <= len`
999-
// since these checks are handled by `Range` impls
994+
let start = match start {
995+
ops::Bound::Excluded(start) if start >= end => return None,
996+
// Cannot overflow because `start < end` implies `start < usize::MAX`.
997+
ops::Bound::Excluded(start) => start + 1,
998+
999+
ops::Bound::Included(start) if start > end => return None,
1000+
ops::Bound::Included(start) => start,
1001+
1002+
ops::Bound::Unbounded => 0,
1003+
};
10001004

10011005
Some(start..end)
10021006
}
@@ -1039,12 +1043,12 @@ unsafe impl<T> SliceIndex<[T]> for (ops::Bound<usize>, ops::Bound<usize>) {
10391043

10401044
#[inline]
10411045
fn get(self, slice: &[T]) -> Option<&Self::Output> {
1042-
into_range(slice.len(), self)?.get(slice)
1046+
try_into_slice_range(slice.len(), self)?.get(slice)
10431047
}
10441048

10451049
#[inline]
10461050
fn get_mut(self, slice: &mut [T]) -> Option<&mut Self::Output> {
1047-
into_range(slice.len(), self)?.get_mut(slice)
1051+
try_into_slice_range(slice.len(), self)?.get_mut(slice)
10481052
}
10491053

10501054
#[inline]

library/core/src/str/mod.rs

Lines changed: 43 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -85,34 +85,50 @@ fn slice_error_fail_rt(s: &str, begin: usize, end: usize) -> ! {
8585
let trunc_len = s.floor_char_boundary(MAX_DISPLAY_LENGTH);
8686
let s_trunc = &s[..trunc_len];
8787
let ellipsis = if trunc_len < s.len() { "[...]" } else { "" };
88+
let len = s.len();
8889

89-
// 1. out of bounds
90-
if begin > s.len() || end > s.len() {
91-
let oob_index = if begin > s.len() { begin } else { end };
92-
panic!("byte index {oob_index} is out of bounds of `{s_trunc}`{ellipsis}");
93-
}
94-
95-
// 2. begin <= end
96-
assert!(
97-
begin <= end,
98-
"begin <= end ({} <= {}) when slicing `{}`{}",
99-
begin,
100-
end,
101-
s_trunc,
102-
ellipsis
103-
);
104-
105-
// 3. character boundary
106-
let index = if !s.is_char_boundary(begin) { begin } else { end };
107-
// find the character
108-
let char_start = s.floor_char_boundary(index);
109-
// `char_start` must be less than len and a char boundary
110-
let ch = s[char_start..].chars().next().unwrap();
111-
let char_range = char_start..char_start + ch.len_utf8();
112-
panic!(
113-
"byte index {} is not a char boundary; it is inside {:?} (bytes {:?}) of `{}`{}",
114-
index, ch, char_range, s_trunc, ellipsis
115-
);
90+
// 1. begin is OOB.
91+
if begin > len {
92+
panic!("start byte index {begin} is out of bounds of `{s_trunc}`{ellipsis}");
93+
}
94+
95+
// 2. end is OOB.
96+
if end > len {
97+
panic!("end byte index {end} is out of bounds of `{s_trunc}`{ellipsis}");
98+
}
99+
100+
// 3. range is backwards.
101+
if begin > end {
102+
panic!("begin > end ({begin} > {end}) when slicing `{s_trunc}`{ellipsis}")
103+
}
104+
105+
// 4. begin is inside a character.
106+
if !s.is_char_boundary(begin) {
107+
let floor = s.floor_char_boundary(begin);
108+
let ceil = s.ceil_char_boundary(begin);
109+
let range = floor..ceil;
110+
let ch = s[floor..ceil].chars().next().unwrap();
111+
panic!(
112+
"start byte index {begin} is not a char boundary; it is inside {ch:?} (bytes {range:?}) of `{s_trunc}`{ellipsis}"
113+
)
114+
}
115+
116+
// 5. end is inside a character.
117+
if !s.is_char_boundary(end) {
118+
let floor = s.floor_char_boundary(end);
119+
let ceil = s.ceil_char_boundary(end);
120+
let range = floor..ceil;
121+
let ch = s[floor..ceil].chars().next().unwrap();
122+
panic!(
123+
"end byte index {end} is not a char boundary; it is inside {ch:?} (bytes {range:?}) of `{s_trunc}`{ellipsis}"
124+
)
125+
}
126+
127+
// 6. end is OOB and range is inclusive (end == len).
128+
// This test cannot be combined with 2. above because for cases like
129+
// `"abcαβγ"[4..9]` the error is that 4 is inside 'α', not that 9 is OOB.
130+
debug_assert_eq!(end, len);
131+
panic!("end byte index {end} is out of bounds of `{s_trunc}`{ellipsis}");
116132
}
117133

118134
impl str {

0 commit comments

Comments
 (0)