From c1c66e7827236268897bbddb30c0dc38437e5e75 Mon Sep 17 00:00:00 2001 From: Elias Rohrer Date: Tue, 5 May 2026 20:10:01 +0200 Subject: [PATCH 1/2] Strip Unicode `Cf` characters in `PrintableString` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `PrintableString` is the sanitiser LDK uses to render untrusted strings (node aliases, BOLT-12 invoice / offer text, `UntrustedString`, LSPS messages, `lightning-invoice` descriptions) to logs and UI. It only replaced `char::is_control` matches (Unicode general category `Cc`) with U+FFFD, leaving the entire `Cf` (Format) category untouched. That is the exact category covering the bidirectional override / isolate codepoints (U+202A..U+202E, U+2066..U+2069) and zero-width characters (U+200B..U+200D, U+FEFF) behind the "Trojan Source" attack family (CVE-2021-42574): a peer can set its alias / invoice description / offer fields to e.g. `safe\u{202E}cipsxe.exe`, which previously passed through verbatim while a human reader sees `safeexe.cips` — defeating the threat model `PrintableString` exists to defend against. Replace `Cf` codepoints alongside `Cc` ones. The `Cf` ranges are inlined as a `matches!` table sourced from Unicode 16.0 to keep the change `no_std`-friendly with no new dependencies. Co-Authored-By: HAL 9000 --- lightning-types/src/string.rs | 54 ++++++++++++++++++++++++++++++++++- 1 file changed, 53 insertions(+), 1 deletion(-) diff --git a/lightning-types/src/string.rs b/lightning-types/src/string.rs index ae5395a5289..58fd2cf25ab 100644 --- a/lightning-types/src/string.rs +++ b/lightning-types/src/string.rs @@ -31,7 +31,11 @@ impl<'a> fmt::Display for PrintableString<'a> { fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { use core::fmt::Write; for c in self.0.chars() { - let c = if c.is_control() { core::char::REPLACEMENT_CHARACTER } else { c }; + let c = if c.is_control() || is_format_char(c) { + core::char::REPLACEMENT_CHARACTER + } else { + c + }; f.write_char(c)?; } @@ -39,6 +43,38 @@ impl<'a> fmt::Display for PrintableString<'a> { } } +// Codepoints in Unicode general category `Cf` (Format), per Unicode 16.0. These are not matched +// by `char::is_control` (which only covers `Cc`), but include the bidirectional override / isolate +// controls (e.g. U+202E RLO) and zero-width characters behind the "Trojan Source" attack family +// (CVE-2021-42574), where an attacker-supplied string renders to a human reader as something other +// than its byte content. Strip them alongside `Cc` characters when sanitising untrusted input. +fn is_format_char(c: char) -> bool { + matches!( + c as u32, + 0x00AD + | 0x0600..=0x0605 + | 0x061C + | 0x06DD + | 0x070F + | 0x0890..=0x0891 + | 0x08E2 + | 0x180E + | 0x200B..=0x200F + | 0x202A..=0x202E + | 0x2060..=0x2064 + | 0x2066..=0x206F + | 0xFEFF + | 0xFFF9..=0xFFFB + | 0x110BD + | 0x110CD + | 0x13430..=0x13440 + | 0x1BCA0..=0x1BCA3 + | 0x1D173..=0x1D17A + | 0xE0001 + | 0xE0020..=0xE007F + ) +} + #[cfg(test)] mod tests { use super::PrintableString; @@ -50,4 +86,20 @@ mod tests { "I \u{1F496} LDK!\u{FFFD}\u{26A1}", ); } + + #[test] + fn sanitizes_unicode_bidi_override_characters() { + // U+202E RIGHT-TO-LEFT OVERRIDE and friends are Unicode general category + // `Cf` (Format), not `Cc` (Control). They enable "Trojan Source" / + // bidi-spoofing attacks where an attacker-supplied string (e.g. a node + // alias gossiped from a peer) renders to a human reader as something + // other than its byte content. `PrintableString` is the sanitiser used + // for exactly these untrusted strings, so it must replace them. + let rendered = format!("{}", PrintableString("safe\u{202E}cipsxe.exe")); + assert!( + !rendered.contains('\u{202E}'), + "PrintableString left a U+202E RLO override in its output: {:?}", + rendered + ); + } } From 907dd41d86acb57a3eec0c1c6583f2bf21e45339 Mon Sep 17 00:00:00 2001 From: Elias Rohrer Date: Wed, 6 May 2026 11:35:13 +0200 Subject: [PATCH 2/2] fixup! Strip Unicode `Cf` characters in `PrintableString` --- lightning-types/src/string.rs | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/lightning-types/src/string.rs b/lightning-types/src/string.rs index 58fd2cf25ab..5f131f5428a 100644 --- a/lightning-types/src/string.rs +++ b/lightning-types/src/string.rs @@ -67,7 +67,7 @@ fn is_format_char(c: char) -> bool { | 0xFFF9..=0xFFFB | 0x110BD | 0x110CD - | 0x13430..=0x13440 + | 0x13430..=0x1343F | 0x1BCA0..=0x1BCA3 | 0x1D173..=0x1D17A | 0xE0001 @@ -101,5 +101,12 @@ mod tests { "PrintableString left a U+202E RLO override in its output: {:?}", rendered ); + + // U+13440 is in the Egyptian Hieroglyph Format Controls block, but its + // general category is `Mn`, not `Cf`, so the `Cf` range ends at U+1343F. + assert_eq!( + format!("{}", PrintableString("x\u{1343F}y\u{13440}z")), + "x\u{FFFD}y\u{13440}z" + ); } }