Auto merge of #151664 - jhpratt:rollup-scDdbzg, r=jhpratt

bors · bors · commit b61425666243 · 2026-01-26T00:51:20.000Z
Rollup of 4 pull requests Successful merges: - #150353 (refactor rustc-hash integration) - #151611 (Improve is_ascii performance on x86_64 with explicit SSE2 intrinsics) - #150705 (Add missing mut to pin.rs docs) - #151639 (Fix broken WASIp1 reference link)
diff --git a/compiler/rustc_data_structures/src/fx.rs b/compiler/rustc_data_structures/src/fx.rs
@@ -1,11 +1,9 @@
-use std::hash::BuildHasherDefault;
-
 pub use rustc_hash::{FxBuildHasher, FxHashMap, FxHashSet, FxHasher};
 
 pub type StdEntry<'a, K, V> = std::collections::hash_map::Entry<'a, K, V>;
 
-pub type FxIndexMap<K, V> = indexmap::IndexMap<K, V, BuildHasherDefault<FxHasher>>;
-pub type FxIndexSet<V> = indexmap::IndexSet<V, BuildHasherDefault<FxHasher>>;
+pub type FxIndexMap<K, V> = indexmap::IndexMap<K, V, FxBuildHasher>;
+pub type FxIndexSet<V> = indexmap::IndexSet<V, FxBuildHasher>;
 pub type IndexEntry<'a, K, V> = indexmap::map::Entry<'a, K, V>;
 pub type IndexOccupiedEntry<'a, K, V> = indexmap::map::OccupiedEntry<'a, K, V>;
 
diff --git a/compiler/rustc_data_structures/src/unord.rs b/compiler/rustc_data_structures/src/unord.rs
@@ -8,10 +8,10 @@ use std::hash::Hash;
 use std::iter::{Product, Sum};
 use std::ops::Index;
 
-use rustc_hash::{FxBuildHasher, FxHashMap, FxHashSet};
 use rustc_macros::{Decodable_NoContext, Encodable_NoContext};
 
 use crate::fingerprint::Fingerprint;
+use crate::fx::{FxBuildHasher, FxHashMap, FxHashSet};
 use crate::stable_hasher::{HashStable, StableCompare, StableHasher, ToStableHashKey};
 
 /// `UnordItems` is the order-less version of `Iterator`. It only contains methods
diff --git a/compiler/rustc_type_ir/src/data_structures/mod.rs b/compiler/rustc_type_ir/src/data_structures/mod.rs
@@ -1,11 +1,9 @@
-use std::hash::BuildHasherDefault;
-
 pub use ena::unify::{NoError, UnifyKey, UnifyValue};
-use rustc_hash::FxHasher;
+use rustc_hash::FxBuildHasher;
 pub use rustc_hash::{FxHashMap as HashMap, FxHashSet as HashSet};
 
-pub type IndexMap<K, V> = indexmap::IndexMap<K, V, BuildHasherDefault<FxHasher>>;
-pub type IndexSet<V> = indexmap::IndexSet<V, BuildHasherDefault<FxHasher>>;
+pub type IndexMap<K, V> = indexmap::IndexMap<K, V, FxBuildHasher>;
+pub type IndexSet<V> = indexmap::IndexSet<V, FxBuildHasher>;
 
 mod delayed_map;
 
diff --git a/library/core/src/pin.rs b/library/core/src/pin.rs
@@ -831,15 +831,13 @@
 //!     <code>fn get_pin_mut(self: [Pin]<[`&mut Self`]>) -> [Pin]<[`&mut T`]></code>.
 //!     Then we could do the following:
 //!     ```compile_fail
-//!     # use std::cell::RefCell;
-//!     # use std::pin::Pin;
-//!     fn exploit_ref_cell<T>(rc: Pin<&mut RefCell<T>>) {
+//!     fn exploit_ref_cell<T>(mut rc: Pin<&mut RefCell<T>>) {
 //!         // Here we get pinned access to the `T`.
 //!         let _: Pin<&mut T> = rc.as_mut().get_pin_mut();
 //!
 //!         // And here we have `&mut T` to the same data.
 //!         let shared: &RefCell<T> = rc.into_ref().get_ref();
-//!         let borrow = shared.borrow_mut();
+//!         let mut borrow = shared.borrow_mut();
 //!         let content = &mut *borrow;
 //!     }
 //!     ```
diff --git a/library/core/src/slice/ascii.rs b/library/core/src/slice/ascii.rs
@@ -460,56 +460,38 @@ const fn is_ascii(s: &[u8]) -> bool {
     )
 }
 
-/// Chunk size for vectorized ASCII checking (two 16-byte SSE registers).
+/// Chunk size for SSE2 vectorized ASCII checking (4x 16-byte loads).
 #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
-const CHUNK_SIZE: usize = 32;
+const SSE2_CHUNK_SIZE: usize = 64;
 
-/// SSE2 implementation using `_mm_movemask_epi8` (compiles to `pmovmskb`) to
-/// avoid LLVM's broken AVX-512 auto-vectorization of counting loops.
-///
-/// FIXME(llvm#176906): Remove this workaround once LLVM generates efficient code.
 #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
+#[inline]
 fn is_ascii_sse2(bytes: &[u8]) -> bool {
     use crate::arch::x86_64::{__m128i, _mm_loadu_si128, _mm_movemask_epi8, _mm_or_si128};
 
-    let mut i = 0;
-
-    while i + CHUNK_SIZE <= bytes.len() {
-        // SAFETY: We have verified that `i + CHUNK_SIZE <= bytes.len()`.
-        let ptr = unsafe { bytes.as_ptr().add(i) };
-
-        // Load two 16-byte chunks and combine them.
-        // SAFETY: We verified `i + 32 <= len`, so ptr is valid for 32 bytes.
-        // `_mm_loadu_si128` allows unaligned loads.
-        let chunk1 = unsafe { _mm_loadu_si128(ptr as *const __m128i) };
-        // SAFETY: Same as above - ptr.add(16) is within the valid 32-byte range.
-        let chunk2 = unsafe { _mm_loadu_si128(ptr.add(16) as *const __m128i) };
-
-        // OR them together - if any byte has the high bit set, the result will too.
-        // SAFETY: SSE2 is guaranteed by the cfg predicate.
-        let combined = unsafe { _mm_or_si128(chunk1, chunk2) };
-
-        // Create a mask from the MSBs of each byte.
-        // If any byte is >= 128, its MSB is 1, so the mask will be non-zero.
-        // SAFETY: SSE2 is guaranteed by the cfg predicate.
-        let mask = unsafe { _mm_movemask_epi8(combined) };
-
+    let (chunks, rest) = bytes.as_chunks::<SSE2_CHUNK_SIZE>();
+
+    for chunk in chunks {
+        let ptr = chunk.as_ptr();
+        // SAFETY: chunk is 64 bytes. SSE2 is baseline on x86_64.
+        let mask = unsafe {
+            let a1 = _mm_loadu_si128(ptr as *const __m128i);
+            let a2 = _mm_loadu_si128(ptr.add(16) as *const __m128i);
+            let b1 = _mm_loadu_si128(ptr.add(32) as *const __m128i);
+            let b2 = _mm_loadu_si128(ptr.add(48) as *const __m128i);
+            // OR all chunks - if any byte has high bit set, combined will too.
+            let combined = _mm_or_si128(_mm_or_si128(a1, a2), _mm_or_si128(b1, b2));
+            // Create a mask from the MSBs of each byte.
+            // If any byte is >= 128, its MSB is 1, so the mask will be non-zero.
+            _mm_movemask_epi8(combined)
+        };
         if mask != 0 {
             return false;
         }
-
-        i += CHUNK_SIZE;
-    }
-
-    // Handle remaining bytes with simple loop
-    while i < bytes.len() {
-        if !bytes[i].is_ascii() {
-            return false;
-        }
-        i += 1;
     }
 
-    true
+    // Handle remaining bytes
+    rest.iter().all(|b| b.is_ascii())
 }
 
 /// ASCII test optimized to use the `pmovmskb` instruction on `x86-64`.
@@ -529,7 +511,7 @@ const fn is_ascii(bytes: &[u8]) -> bool {
             is_ascii_simple(bytes)
         } else {
             // For small inputs, use usize-at-a-time processing to avoid SSE2 call overhead.
-            if bytes.len() < CHUNK_SIZE {
+            if bytes.len() < SSE2_CHUNK_SIZE {
                 let chunks = bytes.chunks_exact(USIZE_SIZE);
                 let remainder = chunks.remainder();
                 for chunk in chunks {
diff --git a/src/doc/rustc/src/platform-support/wasm32-wasip1.md b/src/doc/rustc/src/platform-support/wasm32-wasip1.md
@@ -20,7 +20,7 @@ focused on the Component Model-based definition of WASI. At this point the
 `wasm32-wasip1` Rust target is intended for historical compatibility with
 [WASIp1] set of syscalls.
 
-[WASIp1]: https://github.com/WebAssembly/WASI/tree/main/legacy/preview1
+[WASIp1]: https://github.com/WebAssembly/WASI/tree/wasi-0.1/preview1
 [Component Model]: https://github.com/webassembly/component-model
 
 Today the `wasm32-wasip1` target will generate core WebAssembly modules
diff --git a/tests/assembly-llvm/slice-is-ascii.rs b/tests/assembly-llvm/slice-is-ascii.rs
@@ -13,15 +13,15 @@
 /// Verify `is_ascii` generates efficient code on different architectures:
 ///
 /// - x86_64: Must NOT use `kshiftrd`/`kshiftrq` (broken AVX-512 auto-vectorization).
-///   The fix uses explicit SSE2 intrinsics (`pmovmskb`/`vpmovmskb`).
-///   See: https://github.com/llvm/llvm-project/issues/176906
+///   Good version uses explicit SSE2 intrinsics (`pmovmskb`/`vpmovmskb`).
 ///
 /// - loongarch64: Should use `vmskltz.b` instruction for the fast-path.
-///   This architecture still relies on LLVM auto-vectorization.
 
 // X86_64-LABEL: test_is_ascii
 // X86_64-NOT: kshiftrd
 // X86_64-NOT: kshiftrq
+// X86_64: {{vpor|por}}
+// X86_64: {{vpmovmskb|pmovmskb}}
 
 // LA64-LABEL: test_is_ascii
 // LA64: vmskltz.b