From 0deea5f3dce5f9a526187810972f56d57cf42312 Mon Sep 17 00:00:00 2001 From: Robert M1 <50460704+githubrobbi@users.noreply.github.com> Date: Thu, 4 Jun 2026 08:55:08 -0700 Subject: [PATCH 1/2] fix(mft): route all NTFS-name decodes through one instrumented decoder (WI-4.1) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Every NTFS-name decode used String::from_utf16_lossy, silently replacing unpaired surrogates with U+FFFD across 21 sites in 7 files. Introduce the single instrumented decode_name_u16(&[u16]) -> (String, count) in io/parser/unified.rs, convert all 21 name sites + USN to it, and make io::parser pub(crate) so the parse/ + usn/ modules share the one decoder. Loss is now MEASURED, not silent: decode_name_u16 bumps a process-global relaxed atomic, snapshotted into the new MftStats::lossy_name_count at index-build and warned once when > 0. (Eliminating loss entirely is the WI-4.4 RFC.) The platform/system.rs fs-TYPE-label decode is not a filename and is marked AUDIT-OK(bytes). Also: anti_pattern_gate.sh now skips `//`/`///` comment lines so the decoder's own doc comments aren't false-flagged. Tests: lossless count=0; unpaired surrogate → 1 U+FFFD + global tally increment; lone low surrogate → 1. Co-Authored-By: Claude Opus 4.8 (1M context) --- crates/uffs-mft/src/index/base.rs | 13 +++ crates/uffs-mft/src/index/stats.rs | 7 ++ crates/uffs-mft/src/io.rs | 5 +- crates/uffs-mft/src/io/parser/fragment.rs | 4 +- .../src/io/parser/fragment_extension.rs | 4 +- crates/uffs-mft/src/io/parser/index.rs | 6 +- .../uffs-mft/src/io/parser/index_extension.rs | 6 +- crates/uffs-mft/src/io/parser/unified.rs | 109 +++++++++++++++++- crates/uffs-mft/src/parse/direct_index.rs | 10 +- .../src/parse/direct_index_extension.rs | 10 +- crates/uffs-mft/src/platform/system.rs | 4 + crates/uffs-mft/src/usn/windows.rs | 2 +- .../bugs-rust-wont-catch-implementation.md | 2 +- scripts/ci/anti_pattern_gate.sh | 7 ++ 14 files changed, 163 insertions(+), 26 deletions(-) diff --git a/crates/uffs-mft/src/index/base.rs b/crates/uffs-mft/src/index/base.rs index 42b009bb8..8a42c42ee 100644 --- a/crates/uffs-mft/src/index/base.rs +++ b/crates/uffs-mft/src/index/base.rs @@ -209,6 +209,19 @@ impl MftIndex { stats.total_name_bytes += u64::from(record.first_name.name.length()); } + // WI-4.1: surface NTFS-name decode loss. The shared decoder + // (`io::parser::unified::decode_name_u16`) tallies every U+FFFD + // substitution into a process-global counter; snapshot it into the + // stats and warn once when non-zero so the loss is measured, not + // silent. (Eliminating the loss entirely is the WI-4.4 RFC.) + stats.lossy_name_count = crate::io::parser::unified::lossy_name_count(); + if stats.lossy_name_count > 0 { + tracing::warn!( + lossy_name_count = stats.lossy_name_count, + "filenames contained characters not representable in UTF-8 and were stored with U+FFFD" + ); + } + self.stats = stats; } diff --git a/crates/uffs-mft/src/index/stats.rs b/crates/uffs-mft/src/index/stats.rs index 6f910b435..00df73fad 100644 --- a/crates/uffs-mft/src/index/stats.rs +++ b/crates/uffs-mft/src/index/stats.rs @@ -87,6 +87,12 @@ pub struct MftStats { pub size_bucket_counts: [u32; 8], /// Total bytes per size bucket. pub size_bucket_bytes: [u64; 8], + /// Number of U+FFFD substitutions made while decoding NTFS names from + /// UTF-16 (Category 4, WI-4.1). `0` means every name decoded losslessly; + /// `> 0` means that many code units were not representable in UTF-8 and + /// were stored as the replacement character — surfaced via a `warn!` at + /// index-build time so the loss is visible, not silent. + pub lossy_name_count: u64, } impl MftStats { @@ -113,6 +119,7 @@ impl MftStats { reparse_bytes: 0, size_bucket_counts: [0; 8], size_bucket_bytes: [0; 8], + lossy_name_count: 0, } } diff --git a/crates/uffs-mft/src/io.rs b/crates/uffs-mft/src/io.rs index 93c7262b3..ca4550c18 100644 --- a/crates/uffs-mft/src/io.rs +++ b/crates/uffs-mft/src/io.rs @@ -25,7 +25,10 @@ mod chunking; mod extent_map; mod fixup; mod merger; -mod parser; +// `pub(crate)` so the instrumented name decoder (`parser::unified:: +// decode_name_u16`, WI-4.1) is reachable from the sibling `parse/` and +// `usn/` modules that decode NTFS names — one decoder, one lossy tally. +pub(crate) mod parser; // readers module available on all platforms (contains ChaosMftReader for // offline MFT) pub mod readers; diff --git a/crates/uffs-mft/src/io/parser/fragment.rs b/crates/uffs-mft/src/io/parser/fragment.rs index cc6a67101..5d310761c 100644 --- a/crates/uffs-mft/src/io/parser/fragment.rs +++ b/crates/uffs-mft/src/io/parser/fragment.rs @@ -141,7 +141,7 @@ pub fn parse_record_to_fragment( .chunks_exact(2) .map(|pair| u16::from_le_bytes([pair[0], pair[1]])) .collect(); - let name = String::from_utf16_lossy(&name_u16); + let name = crate::io::parser::unified::decode_name_u16(&name_u16).0; let parent_frs = file_reference_to_frs(fn_attr.parent_directory); let namespace = fn_attr.file_name_namespace; @@ -241,7 +241,7 @@ pub fn parse_record_to_fragment( .chunks_exact(2) .map(|pair| u16::from_le_bytes([pair[0], pair[1]])) .collect(); - let stream_name = String::from_utf16_lossy(&name_u16); + let stream_name = crate::io::parser::unified::decode_name_u16(&name_u16).0; // ALL named $DATA streams create regular // stream entries. Internal ones are filtered from // output by is_internal_windows_stream in the output layer. diff --git a/crates/uffs-mft/src/io/parser/fragment_extension.rs b/crates/uffs-mft/src/io/parser/fragment_extension.rs index 6f42c78e7..12e206a65 100644 --- a/crates/uffs-mft/src/io/parser/fragment_extension.rs +++ b/crates/uffs-mft/src/io/parser/fragment_extension.rs @@ -95,7 +95,7 @@ pub(super) fn parse_extension_to_fragment( .chunks_exact(2) .map(|pair| u16::from_le_bytes([pair[0], pair[1]])) .collect(); - let name = String::from_utf16_lossy(&name_u16); + let name = crate::io::parser::unified::decode_name_u16(&name_u16).0; let parent_frs = fn_attr.parent_directory & 0x0000_FFFF_FFFF_FFFF; names.push((name, parent_frs)); } @@ -169,7 +169,7 @@ pub(super) fn parse_extension_to_fragment( .chunks_exact(2) .map(|pair| u16::from_le_bytes([pair[0], pair[1]])) .collect(); - let stream_name = String::from_utf16_lossy(&name_u16); + let stream_name = crate::io::parser::unified::decode_name_u16(&name_u16).0; // ALL named $DATA streams create regular // stream entries. Internal ones are filtered from // output by is_internal_windows_stream in the output layer. diff --git a/crates/uffs-mft/src/io/parser/index.rs b/crates/uffs-mft/src/io/parser/index.rs index a513dc45b..018885232 100644 --- a/crates/uffs-mft/src/io/parser/index.rs +++ b/crates/uffs-mft/src/io/parser/index.rs @@ -201,7 +201,7 @@ pub fn parse_record_to_index(data: &[u8], frs: u64, index: &mut crate::index::Mf .chunks_exact(2) .map(|pair| u16::from_le_bytes([pair[0], pair[1]])) .collect(); - let name = String::from_utf16_lossy(&name_u16); + let name = crate::io::parser::unified::decode_name_u16(&name_u16).0; let parent_frs = file_reference_to_frs(fn_attr.parent_directory); let namespace = fn_attr.file_name_namespace; @@ -350,7 +350,7 @@ pub fn parse_record_to_index(data: &[u8], frs: u64, index: &mut crate::index::Mf .chunks_exact(2) .map(|pair| u16::from_le_bytes([pair[0], pair[1]])) .collect(); - let stream_name = String::from_utf16_lossy(&name_u16); + let stream_name = crate::io::parser::unified::decode_name_u16(&name_u16).0; // $BadClus:$Bad (FRS 8) uses InitializedSize // instead of DataSize/AllocatedSize to avoid counting the @@ -449,7 +449,7 @@ pub fn parse_record_to_index(data: &[u8], frs: u64, index: &mut crate::index::Mf .chunks_exact(2) .map(|pair| u16::from_le_bytes([pair[0], pair[1]])) .collect(); - String::from_utf16_lossy(&name_u16) + crate::io::parser::unified::decode_name_u16(&name_u16).0 }; (is_i30, name) } else { diff --git a/crates/uffs-mft/src/io/parser/index_extension.rs b/crates/uffs-mft/src/io/parser/index_extension.rs index 1822567b3..96be83e47 100644 --- a/crates/uffs-mft/src/io/parser/index_extension.rs +++ b/crates/uffs-mft/src/io/parser/index_extension.rs @@ -130,7 +130,7 @@ pub(super) fn parse_extension_to_index( .chunks_exact(2) .map(|pair| u16::from_le_bytes([pair[0], pair[1]])) .collect(); - let name = String::from_utf16_lossy(&name_u16); + let name = crate::io::parser::unified::decode_name_u16(&name_u16).0; let parent_frs = fn_attr.parent_directory & 0x0000_FFFF_FFFF_FFFF; names.push((name, parent_frs)); } @@ -252,7 +252,7 @@ pub(super) fn parse_extension_to_index( .chunks_exact(2) .map(|pair| u16::from_le_bytes([pair[0], pair[1]])) .collect(); - let stream_name = String::from_utf16_lossy(&name_u16); + let stream_name = crate::io::parser::unified::decode_name_u16(&name_u16).0; // ALL named $DATA streams create regular // stream entries. Internal ones are filtered from // output by is_internal_windows_stream in the output layer. @@ -304,7 +304,7 @@ pub(super) fn parse_extension_to_index( .chunks_exact(2) .map(|pair| u16::from_le_bytes([pair[0], pair[1]])) .collect(); - String::from_utf16_lossy(&name_u16) + crate::io::parser::unified::decode_name_u16(&name_u16).0 }; (is_i30, name) } else { diff --git a/crates/uffs-mft/src/io/parser/unified.rs b/crates/uffs-mft/src/io/parser/unified.rs index 7e686697d..80d769f11 100644 --- a/crates/uffs-mft/src/io/parser/unified.rs +++ b/crates/uffs-mft/src/io/parser/unified.rs @@ -21,13 +21,17 @@ use crate::ntfs::{ }; /// Decode a UTF-16LE byte slice into `out`, replacing unpaired surrogates -/// with U+FFFD. Returns the number of bytes written to `out`. +/// with U+FFFD. Returns the number of U+FFFD replacements emitted +/// (`0` = lossless). /// /// This avoids the per-call `SmallVec` + `String` allocation that -/// `String::from_utf16_lossy` requires. +/// `String::from_utf16_lossy` requires, and — unlike `from_utf16_lossy` — +/// surfaces the substitution count so name loss at the NTFS boundary is +/// measured, not silent (Category 4, WI-4.1). #[inline] -fn decode_utf16le_into(bytes: &[u8], out: &mut String) { +fn decode_utf16le_into(bytes: &[u8], out: &mut String) -> u32 { out.clear(); + let mut replacements: u32 = 0; let mut i = 0_usize; while let Some(pair) = bytes .get(i..i + 2) @@ -52,17 +56,21 @@ fn decode_utf16le_into(bytes: &[u8], out: &mut String) { out.push(ch); } else { out.push(char::REPLACEMENT_CHARACTER); + replacements = replacements.saturating_add(1); } } else { out.push(char::REPLACEMENT_CHARACTER); + replacements = replacements.saturating_add(1); } } else { out.push(char::REPLACEMENT_CHARACTER); + replacements = replacements.saturating_add(1); } } // Low surrogate without preceding high 0xDC00..=0xDFFF => { out.push(char::REPLACEMENT_CHARACTER); + replacements = replacements.saturating_add(1); } _ => { // All non-surrogate u16 values are valid Unicode scalar values. @@ -73,6 +81,50 @@ fn decode_utf16le_into(bytes: &[u8], out: &mut String) { } } } + replacements +} + +/// Decode a `&[u16]` UTF-16 name into a fresh `String`, returning +/// `(String, replacement_count)`. Use this instead of +/// `String::from_utf16_lossy` at NTFS name boundaries so loss is counted, +/// not silent (Category 4, WI-4.1). +/// +/// Most NTFS-name call sites already hold a `Vec` / `SmallVec<[u16; N]>` +/// (the attribute decoder collects code units before stringifying), so this +/// `&[u16]` entry point avoids re-deriving a byte slice. There is exactly +/// ONE surrogate-handling implementation: this re-encodes to LE bytes and +/// routes through `decode_utf16le_into`. +#[inline] +pub(crate) fn decode_name_u16(units: &[u16]) -> (String, u32) { + let mut bytes = Vec::with_capacity(units.len().saturating_mul(2)); + for unit in units { + bytes.extend_from_slice(&unit.to_le_bytes()); + } + let mut out = String::new(); + let count = decode_utf16le_into(&bytes, &mut out); + if count > 0 { + LOSSY_NAME_COUNT.fetch_add(u64::from(count), core::sync::atomic::Ordering::Relaxed); + } + (out, count) +} + +/// Process-global tally of U+FFFD substitutions emitted by +/// [`decode_name_u16`] across all NTFS-name decodes (Category 4, WI-4.1). +/// +/// The parser call sites are spread across nine modules and do not thread a +/// stats accumulator through their (hot-path) signatures, so the count is +/// gathered here with a single relaxed atomic — cheap, lock-free, and read +/// at index-build time into [`crate::index::stats::MftStats::lossy_name_count`] +/// for the "N filenames were stored with U+FFFD" warning. `Relaxed` is +/// sufficient: it is a monotonic diagnostic counter, not a synchronisation +/// point. +pub(crate) static LOSSY_NAME_COUNT: core::sync::atomic::AtomicU64 = + core::sync::atomic::AtomicU64::new(0); + +/// Snapshot the current global lossy-name tally. +#[inline] +pub(crate) fn lossy_name_count() -> u64 { + LOSSY_NAME_COUNT.load(core::sync::atomic::Ordering::Relaxed) } /// Process a single MFT record (base OR extension) in one pass. @@ -473,3 +525,54 @@ fn rd_u64(buf: &[u8], off: usize) -> u64 { .and_then(|sl| <[u8; 8]>::try_from(sl).ok()) .map_or(0, u64::from_le_bytes) } + +#[cfg(test)] +mod tests { + use super::{decode_name_u16, lossy_name_count}; + + #[test] + fn decode_name_u16_lossless_bmp_and_astral() { + // "Aé😀" — BMP + an astral char (valid surrogate pair). No loss. + // 'A'=0x0041, 'é'=0x00E9, '😀'=U+1F600 → D83D DE00. + let units = [0x0041_u16, 0x00E9, 0xD83D, 0xDE00]; + let (name, count) = decode_name_u16(&units); + assert_eq!(count, 0, "well-formed UTF-16 must decode losslessly"); + assert_eq!(name, "Aé😀"); + assert!(!name.contains(char::REPLACEMENT_CHARACTER)); + } + + #[test] + fn decode_name_u16_unpaired_surrogate_is_counted_and_replaced() { + // A lone high surrogate (0xD800) with no following low surrogate — + // legal on NTFS, illegal in UTF-8. Must NOT panic; must substitute + // exactly one U+FFFD and report the count. + let units = [ + 0x0066_u16, // 'f' + 0xD800, // unpaired high + 0x006F, // 'o' + ]; + let before = lossy_name_count(); + let (name, count) = decode_name_u16(&units); + assert_eq!(count, 1, "one unpaired surrogate → one replacement"); + assert!( + name.contains(char::REPLACEMENT_CHARACTER), + "decoded name must contain U+FFFD" + ); + // The process-global tally increased by the replacement count, so the + // index-build warn/stat sees the loss (WI-4.1). + assert_eq!( + lossy_name_count(), + before + u64::from(count), + "global lossy tally must increase by the replacement count" + ); + } + + #[test] + fn decode_name_u16_lone_low_surrogate_is_counted() { + // A lone LOW surrogate (0xDC00) with no preceding high surrogate. + let units = [0xDC00_u16]; + let (name, count) = decode_name_u16(&units); + assert_eq!(count, 1); + assert_eq!(name, "\u{FFFD}"); + } +} diff --git a/crates/uffs-mft/src/parse/direct_index.rs b/crates/uffs-mft/src/parse/direct_index.rs index 2857e840f..ad2dc120f 100644 --- a/crates/uffs-mft/src/parse/direct_index.rs +++ b/crates/uffs-mft/src/parse/direct_index.rs @@ -196,7 +196,7 @@ pub fn parse_record_to_index(data: &[u8], frs: u64, index: &mut crate::index::Mf .chunks_exact(2) .map(|c| u16::from_le_bytes([c[0], c[1]])) .collect(); - let name = String::from_utf16_lossy(&name_u16); + let name = crate::io::parser::unified::decode_name_u16(&name_u16).0; let parent_frs = file_reference_to_frs(fn_attr.parent_directory); let namespace = fn_attr.file_name_namespace; @@ -304,7 +304,7 @@ pub fn parse_record_to_index(data: &[u8], frs: u64, index: &mut crate::index::Mf .chunks_exact(2) .map(|c| u16::from_le_bytes([c[0], c[1]])) .collect(); - let stream_name = String::from_utf16_lossy(&name_u16); + let stream_name = crate::io::parser::unified::decode_name_u16(&name_u16).0; // ALL named $DATA streams create regular stream entries. // Internal ones are filtered from // output by is_internal_windows_stream in the output layer. @@ -376,7 +376,7 @@ pub fn parse_record_to_index(data: &[u8], frs: u64, index: &mut crate::index::Mf .chunks_exact(2) .map(|c| u16::from_le_bytes([c[0], c[1]])) .collect(); - String::from_utf16_lossy(&name_u16) + crate::io::parser::unified::decode_name_u16(&name_u16).0 }; (is_i30, name) } else { @@ -500,7 +500,7 @@ pub fn parse_record_to_index(data: &[u8], frs: u64, index: &mut crate::index::Mf .chunks_exact(2) .map(|c| u16::from_le_bytes([c[0], c[1]])) .collect(); - String::from_utf16_lossy(&name_u16) + crate::io::parser::unified::decode_name_u16(&name_u16).0 } else { String::new() } @@ -585,7 +585,7 @@ pub fn parse_record_to_index(data: &[u8], frs: u64, index: &mut crate::index::Mf .chunks_exact(2) .map(|c| u16::from_le_bytes([c[0], c[1]])) .collect(); - String::from_utf16_lossy(&name_u16) + crate::io::parser::unified::decode_name_u16(&name_u16).0 } else { String::new() } diff --git a/crates/uffs-mft/src/parse/direct_index_extension.rs b/crates/uffs-mft/src/parse/direct_index_extension.rs index 6e6676a5b..adb6bcf8f 100644 --- a/crates/uffs-mft/src/parse/direct_index_extension.rs +++ b/crates/uffs-mft/src/parse/direct_index_extension.rs @@ -154,7 +154,7 @@ pub(super) fn parse_extension_to_index( .chunks_exact(2) .map(|c| u16::from_le_bytes([c[0], c[1]])) .collect(); - let name = String::from_utf16_lossy(&name_u16); + let name = crate::io::parser::unified::decode_name_u16(&name_u16).0; let parent_frs = fn_attr.parent_directory & 0x0000_FFFF_FFFF_FFFF; names.push((name, parent_frs)); } @@ -233,7 +233,7 @@ pub(super) fn parse_extension_to_index( .chunks_exact(2) .map(|c| u16::from_le_bytes([c[0], c[1]])) .collect(); - let stream_name = String::from_utf16_lossy(&name_u16); + let stream_name = crate::io::parser::unified::decode_name_u16(&name_u16).0; // ALL named $DATA streams create regular // stream entries. Internal ones are filtered from // output by is_internal_windows_stream in the output layer. @@ -282,7 +282,7 @@ pub(super) fn parse_extension_to_index( .chunks_exact(2) .map(|c| u16::from_le_bytes([c[0], c[1]])) .collect(); - String::from_utf16_lossy(&name_u16) + crate::io::parser::unified::decode_name_u16(&name_u16).0 }; (is_i30, name) } else { @@ -403,7 +403,7 @@ pub(super) fn parse_extension_to_index( .chunks_exact(2) .map(|c| u16::from_le_bytes([c[0], c[1]])) .collect(); - String::from_utf16_lossy(&name_u16) + crate::io::parser::unified::decode_name_u16(&name_u16).0 } else { String::new() } @@ -488,7 +488,7 @@ pub(super) fn parse_extension_to_index( .chunks_exact(2) .map(|c| u16::from_le_bytes([c[0], c[1]])) .collect(); - String::from_utf16_lossy(&name_u16) + crate::io::parser::unified::decode_name_u16(&name_u16).0 } else { String::new() } diff --git a/crates/uffs-mft/src/platform/system.rs b/crates/uffs-mft/src/platform/system.rs index d138e8834..bfe2dfcb2 100644 --- a/crates/uffs-mft/src/platform/system.rs +++ b/crates/uffs-mft/src/platform/system.rs @@ -223,6 +223,10 @@ fn is_ntfs_volume(drive_letter: DriveLetter) -> bool { return false; } + // AUDIT-OK(bytes): decodes the Windows filesystem TYPE label (e.g. "NTFS") + // for an `== "NTFS"` check — not an NTFS filename. A lossy decode could + // only fail the equality (fail-safe: treat as not-NTFS), never corrupt a + // stored name, so the instrumented name decoder does not apply here. let fs_name_raw = String::from_utf16_lossy(&fs_name_buffer); let fs_name = fs_name_raw.trim_end_matches('\0'); diff --git a/crates/uffs-mft/src/usn/windows.rs b/crates/uffs-mft/src/usn/windows.rs index 6ca352d32..e3f2cb956 100644 --- a/crates/uffs-mft/src/usn/windows.rs +++ b/crates/uffs-mft/src/usn/windows.rs @@ -315,7 +315,7 @@ pub fn read_usn_journal( ]) }) .collect(); - String::from_utf16_lossy(&name_u16) + crate::io::parser::unified::decode_name_u16(&name_u16).0 }); // On-disk → typed boundary. NTFS file references are 64-bit // values whose low 48 bits encode the FRS; the high 16 bits diff --git a/docs/architecture/code-quality/bugs-rust-wont-catch-implementation.md b/docs/architecture/code-quality/bugs-rust-wont-catch-implementation.md index b10093730..8244b2e3a 100644 --- a/docs/architecture/code-quality/bugs-rust-wont-catch-implementation.md +++ b/docs/architecture/code-quality/bugs-rust-wont-catch-implementation.md @@ -94,7 +94,7 @@ means the acceptance criteria were checked off *and* the pipeline was green. | WI-1.2 | 1 TOCTOU | Randomised, `create_new` temp in `atomic_write` + daemon `--out` export | ✅ | `harden/bugs` | ✅ | | WI-5.1 | 5 Panic | Enable `arithmetic_side_effects`; `overflow-checks=true` for `dist` | ✅ | `harden/bugs` | ✅ | | WI-G.1 | Guard | CI grep-gate script forbidding the anti-patterns from returning | ✅ | `harden/bugs` | ✅ | -| WI-4.1 | 4 Bytes | Single instrumented UTF-16 decoder; per-index `lossy_name_count` stat + warn | ⬜ | | | +| WI-4.1 | 4 Bytes | Single instrumented UTF-16 decoder; per-index `lossy_name_count` stat + warn | ✅ | `harden/bugs-2` | ✅ | | WI-4.2 | 4 Bytes | Pass `OsString` (not `to_string_lossy`) to spawn argv / IPC paths | ⬜ | | | | WI-4.3 | 4 Bytes | Strict-parse subprocess stdout used for decisions (PID/name) | ✅ | `harden/bugs` | ✅ | | WI-4.4 | 4 Bytes | **RFC + impl:** lossless name storage (binary/WTF-8 column) | 🟨 RFC landed | `harden/bugs` | RFC ✅ / impl pending sign-off | diff --git a/scripts/ci/anti_pattern_gate.sh b/scripts/ci/anti_pattern_gate.sh index a8f6f29ce..cc52c0989 100755 --- a/scripts/ci/anti_pattern_gate.sh +++ b/scripts/ci/anti_pattern_gate.sh @@ -67,6 +67,13 @@ run_rule() { local file lineno content while IFS=: read -r file lineno content; do [[ -n "$file" ]] || continue + # Skip matches inside line/doc comments: a `//` or `///` line cannot + # itself BE a lossy conversion / anti-pattern — it only mentions the + # token (e.g. doc comments on the approved decoder). Anti-patterns are + # about executable code, not prose. + if printf '%s\n' "$content" | grep -Eq '^[[:space:]]*//'; then + continue + fi if has_audit_ok "$file" "$lineno"; then continue fi From c07bbbdcbba4c5da2821675bbff4950cdb0d20ea Mon Sep 17 00:00:00 2001 From: Robert M1 <50460704+githubrobbi@users.noreply.github.com> Date: Thu, 4 Jun 2026 08:57:47 -0700 Subject: [PATCH 2/2] =?UTF-8?q?docs(code-quality):=20WI-6.3=20discard=20au?= =?UTF-8?q?dit=20=E2=80=94=20prod=20already=20clean?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Triaged all 30 .ok()/let _ = sites in prod (tests excluded). None are un-annotated behavior-affecting Result discards: they are Result→Option convert-and-use, infallible in-memory write!/writeln!, doc examples, best-effort diag flushes, or intentional side-effect get_or_create calls (already covered by a block #[expect] + comment). Grep confirms zero io-result .ok() discards in prod. The meaningful control writes were fixed in WI-6.1. No code change warranted; triage recorded. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../code-quality/bugs-rust-wont-catch-implementation.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/architecture/code-quality/bugs-rust-wont-catch-implementation.md b/docs/architecture/code-quality/bugs-rust-wont-catch-implementation.md index 8244b2e3a..95525235a 100644 --- a/docs/architecture/code-quality/bugs-rust-wont-catch-implementation.md +++ b/docs/architecture/code-quality/bugs-rust-wont-catch-implementation.md @@ -102,7 +102,7 @@ means the acceptance criteria were checked off *and* the pipeline was green. | WI-5.3 | 5 Panic | In-tree malformed-input fuzz/regression tests (parsers + cache deserialize) | ⬜ | | | | WI-6.1 | 6 Errors | `daemon_ctl` control writes: surface/log instead of bare `drop` | ✅ | `harden/bugs` | ✅ | | WI-6.2 | 6 Errors | Log dir-create failures (`log_init`, `mft/logging`) to stderr once | ✅ | `harden/bugs` | ✅ | -| WI-6.3 | 6 Errors | Audit remaining `.ok()`/`let _ =`; add justification comments | ⬜ | | | +| WI-6.3 | 6 Errors | Audit remaining `.ok()`/`let _ =`; add justification comments | ✅ | `harden/bugs-2` | ✅ | | WI-8.1 | 8 Trust | Broker: thread one process handle verify→`DuplicateHandle` (no PID re-open) | ⬜ | | | | WI-8.2 | 8 Trust | Document daemon-nonce security property (depends on WI-2.2) | ✅ | `harden/bugs` | ✅ | | WI-7.1 | 7 Parity | Parity corpus: pathological names; assert vs Windows enumeration | ⬜ | | |