From ca2b8977f499cd6952b16f730916d0f0946892ba Mon Sep 17 00:00:00 2001
From: patchwright <patchwright@users.noreply.github.com>
Date: Tue, 30 Jun 2026 22:30:01 +0200
Subject: [PATCH] fix: treat non-breaking space as a separator in links (#66)

---
 CHANGELOG.md   |  5 +++++
 src/domains.rs |  4 +++-
 src/email.rs   |  2 +-
 src/url.rs     |  2 +-
 tests/email.rs | 10 ++++++++++
 tests/url.rs   | 10 ++++++++++
 6 files changed, 30 insertions(+), 3 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index fdaca52..de5439e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,11 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 This project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html),
 with the exception that 0.x versions can break between minor versions.
 
+## [Unreleased]
+### Changed
+- Non-breaking space (U+00A0) is no longer included as part of e-mail and URL
+  links. It is now treated as a separator, like other whitespace (#66)
+
 ## [0.11.0] - 2026-04-12
 ### Changed
 - Include delimiters before slashes in URLs. E.g. in `https://test.com/!/`,
diff --git a/src/domains.rs b/src/domains.rs
index 4585284..16c5cfd 100644
--- a/src/domains.rs
+++ b/src/domains.rs
@@ -49,7 +49,9 @@ pub(crate) fn find_authority_end(
         let can_be_last = match c {
             // ALPHA
             'a'..='z' | 'A'..='Z' | '\u{80}'..=char::MAX => {
-                if !iri_parsing_enabled && c > '\u{80}' {
+                // Non-breaking space (U+00A0) is whitespace and must end the
+                // authority, even though it falls in the non-ASCII range. (#66)
+                if (!iri_parsing_enabled && c > '\u{80}') || c == '\u{A0}' {
                     break;
                 }
                 // Can start or end a domain label, but not numeric
diff --git a/src/email.rs b/src/email.rs
index b7b3319..fa17fe5 100644
--- a/src/email.rs
+++ b/src/email.rs
@@ -89,7 +89,7 @@ impl EmailScanner {
             | '|'
             | '}'
             | '~' => true,
-            _ => c >= '\u{80}',
+            _ => c >= '\u{80}' && c != '\u{A0}',
         }
     }
 }
diff --git a/src/url.rs b/src/url.rs
index c1bfc9b..bd25fc3 100644
--- a/src/url.rs
+++ b/src/url.rs
@@ -216,7 +216,7 @@ fn find_url_end(s: &str, quote: Option<char>, iri_parsing_enabled: bool) -> Opti
 
     for (i, c) in s.char_indices() {
         let can_be_last = match c {
-            '\u{00}'..='\u{1F}' | ' ' | '|' | '\"' | '<' | '>' | '`' | '\u{7F}'..='\u{9F}' => {
+            '\u{00}'..='\u{1F}' | ' ' | '|' | '\"' | '<' | '>' | '`' | '\u{7F}'..='\u{9F}' | '\u{A0}' => {
                 // These can never be part of an URL, so stop now. See RFC 3986 and RFC 3987.
                 // Some characters are not in the above list, even they are not in "unreserved"
                 // or "reserved":
diff --git a/tests/email.rs b/tests/email.rs
index 562e637..35ba9b7 100644
--- a/tests/email.rs
+++ b/tests/email.rs
@@ -123,6 +123,16 @@ fn fuzz() {
     assert_linked("a@a.xyϸ", "|a@a.xyϸ|");
 }
 
+#[test]
+fn non_breaking_space_does_not_join_email() {
+    // Non-breaking space (U+00A0) must not be swallowed into an e-mail link.
+    // https://github.com/robinst/linkify/issues/66
+    assert_linked(
+        "this is a mail address:\u{a0}test@example.com\u{a0}surrounded by non-breaking spaces",
+        "this is a mail address:\u{a0}|test@example.com|\u{a0}surrounded by non-breaking spaces",
+    );
+}
+
 fn assert_not_linked(s: &str) {
     let mut finder = LinkFinder::new();
     finder.kinds(&[LinkKind::Email]);
diff --git a/tests/url.rs b/tests/url.rs
index 41b1650..d1378ac 100644
--- a/tests/url.rs
+++ b/tests/url.rs
@@ -558,6 +558,16 @@ fn fuzz() {
     assert_not_linked("ab:/ϸ");
 }
 
+#[test]
+fn non_breaking_space_does_not_join_url() {
+    // Non-breaking space (U+00A0) must not be part of a URL.
+    // https://github.com/robinst/linkify/issues/66
+    assert_linked(
+        "see https://example.com\u{a0}now",
+        "see |https://example.com|\u{a0}now",
+    );
+}
+
 fn assert_not_linked(s: &str) {
     assert_linked(s, s);
 }