rust-lang · ehuss · Feb 13, 2026 · Feb 13, 2026 · Feb 13, 2026 · Feb 13, 2026
diff --git a/dev-guide/src/grammar.md b/dev-guide/src/grammar.md
@@ -39,19 +39,35 @@ Sequence ->
         (` `* AdornedExpr)* ` `* Cut
       | (` `* AdornedExpr)+
 
-AdornedExpr -> ExprRepeat Suffix? Footnote?
+AdornedExpr -> Prefix? Expr1 Quantifier? Suffix? Footnote?
+
+Prefix -> NegativeLookahead
+
+NegativeLookahead -> `!`
 
 Suffix -> ` _` <not underscore, unless in backtick>* `_`
 
 Footnote -> `[^` ~[`]` LF]+ `]`
 
-ExprRepeat ->
-      Expr1 `?`
-    | Expr1 `*?`
-    | Expr1 `*`
-    | Expr1 `+?`
-    | Expr1 `+`
-    | Expr1 `{` Range? `..` Range? `}`
+Quantifier ->
+      Optional
+    | Repeat
+    | RepeatNonGreedy
+    | RepeatPlus
+    | RepeatPlusNonGreedy
+    | RepeatRange
+
+Optional -> `?`
+
+Repeat -> `*`
+
+RepeatNonGreedy -> `*?`
+
+RepeatPlus -> `+`
+
+RepeatPlusNonGreedy -> `+?`
+
+RepeatRange -> `{` Range? `..` Range? `}`
 
 Range -> [0-9]+
 
@@ -66,7 +82,7 @@ Expr1 ->
     | Group
     | NegativeExpression
 
-Unicode -> `U+` [`A`-`Z` `0`-`9`]4..4
+Unicode -> `U+` [`A`-`Z` `0`-`9`]4..6
 
 NonTerminal -> Name
 
@@ -83,7 +99,11 @@ Characters ->
     | CharacterTerminal
     | CharacterName
 
-CharacterRange -> BACKTICK <any char> BACKTICK `-` BACKTICK <any char> BACKTICK
+CharacterRange -> Character `-` Character
+
+Character ->
+        BACKTICK <any char> BACKTICK
+      | Unicode
 
 CharacterTerminal -> Terminal
 
@@ -120,10 +140,11 @@ The general format is a series of productions separated by blank lines. The expr
 | Suffix | \_except \[LazyBooleanExpression\]\_  | Adds a suffix to the previous expression to provide an additional English description, rendered in subscript. This can contain limited Markdown, but try to avoid anything except basics like links. |
 | Footnote | \[^extern-safe\] | Adds a footnote, which can supply extra information that may be helpful to the user. The footnote itself should be defined outside of the code block like a normal Markdown footnote. |
 | Optional | Expr? | The preceding expression is optional. |
+| NegativeLookahead | !Expr | Matches if Expr does not follow, without consuming any input. |
 | Repeat | Expr* | The preceding expression is repeated 0 or more times. |
-| Repeat (non-greedy) | Expr*? | The preceding expression is repeated 0 or more times without being greedy. |
+| RepeatNonGreedy | Expr*? | The preceding expression is repeated 0 or more times without being greedy. |
 | RepeatPlus | Expr+ | The preceding expression is repeated 1 or more times. |
-| RepeatPlus (non-greedy) | Expr+? | The preceding expression is repeated 1 or more times without being greedy. |
+| RepeatPlusNonGreedy | Expr+? | The preceding expression is repeated 1 or more times without being greedy. |
 | RepeatRange | Expr{2..4} | The preceding expression is repeated between the range of times specified. Either bound can be excluded, which works just like Rust ranges. |
 
 ## Automatic linking

diff --git a/src/comments.md b/src/comments.md
@@ -3,34 +3,49 @@ r[comments]
 
 r[comments.syntax]
 ```grammar,lexer
-@root LINE_COMMENT ->
+@root COMMENT ->
+      LINE_COMMENT
+    | INNER_LINE_DOC
+    | OUTER_LINE_DOC
+    | INNER_BLOCK_DOC
+    | OUTER_BLOCK_DOC
+    | BLOCK_COMMENT
+
+LINE_COMMENT ->
       `//` (~[`/` `!` LF] | `//`) ~LF*
-    | `//`
+    | `//` EOF
+    | `//` _immediately followed by LF_
 
 BLOCK_COMMENT ->
-      `/*`
+      `/**/`
+    | `/***/`
+    | `/*`
+        ^
         ( ~[`*` `!`] | `**` | BLOCK_COMMENT_OR_DOC )
         ( BLOCK_COMMENT_OR_DOC | ~`*/` )*
       `*/`
-    | `/**/`
-    | `/***/`
 
-@root INNER_LINE_DOC ->
-    `//!` ~[LF CR]*
+INNER_LINE_DOC ->
+    `//!` ^ LINE_DOC_COMMENT_CONTENT (LF | EOF)
+
+LINE_DOC_COMMENT_CONTENT -> (!CR ~LF)*
 
 INNER_BLOCK_DOC ->
-    `/*!` ( BLOCK_COMMENT_OR_DOC | ~[`*/` CR] )* `*/`
+    `/*!` ^ ( BLOCK_COMMENT_OR_DOC | BLOCK_CHAR )* `*/`
 
-@root OUTER_LINE_DOC ->
-    `///` (~`/` ~[LF CR]*)?
+OUTER_LINE_DOC ->
+    `///` ^ LINE_DOC_COMMENT_CONTENT (LF | EOF)
 
 OUTER_BLOCK_DOC ->
-    `/**`
+    `/**` ![`*` `/`]
+      ^
       ( ~`*` | BLOCK_COMMENT_OR_DOC )
-      ( BLOCK_COMMENT_OR_DOC | ~[`*/` CR] )*
+      ( BLOCK_COMMENT_OR_DOC | BLOCK_CHAR )*
     `*/`
 
-@root BLOCK_COMMENT_OR_DOC ->
+BLOCK_CHAR -> (!(`*/` | CR) CHAR)
+
+BLOCK_COMMENT_OR_DOC ->
       BLOCK_COMMENT
     | OUTER_BLOCK_DOC
     | INNER_BLOCK_DOC
@@ -51,7 +66,7 @@ r[comments.doc.syntax]
 Line doc comments beginning with exactly _three_ slashes (`///`), and block doc comments (`/** ... */`), both outer doc comments, are interpreted as a special syntax for [`doc` attributes].
 
 r[comments.doc.attributes]
-That is, they are equivalent to writing `#[doc="..."]` around the body of the comment, i.e., `/// Foo` turns into `#[doc="Foo"]` and `/** Bar */` turns into `#[doc="Bar"]`. They must therefore appear before something that accepts an outer attribute.
+That is, they are equivalent to writing `#[doc="..."]` around the body of the comment, i.e., `/// Foo` turns into `#[doc=" Foo"]` and `/** Bar */` turns into `#[doc=" Bar "]`. They must therefore appear before something that accepts an outer attribute.
 
 r[comments.doc.inner-syntax]
 Line comments beginning with `//!` and block comments `/*! ... */` are doc comments that apply to the parent of the comment, rather than the item that follows.

diff --git a/src/identifiers.md b/src/identifiers.md
@@ -16,7 +16,7 @@ NON_KEYWORD_IDENTIFIER -> IDENTIFIER_OR_KEYWORD _except a [strict][lex.keywords.
 IDENTIFIER -> NON_KEYWORD_IDENTIFIER | RAW_IDENTIFIER
 
 RESERVED_RAW_IDENTIFIER ->
-    `r#` (`_` | `crate` | `self` | `Self` | `super`) _not immediately followed by XID_Continue_
+    `r#` (`_` | `crate` | `self` | `Self` | `super`) !XID_Continue
 ```
 
 <!-- When updating the version, update the UAX links, too. -->

diff --git a/src/input-format.md b/src/input-format.md
@@ -3,9 +3,13 @@ r[input]
 
 r[input.syntax]
 ```grammar,lexer
-@root CHAR -> <a Unicode scalar value>
+CHAR -> [U+0000-U+D7FF U+E000-U+10FFFF] // a Unicode scalar value
+
+ASCII -> [U+0000-U+007F]
 
 NUL -> U+0000
+
+EOF -> !CHAR  // End of file or input
 ```
 
 r[input.intro]

diff --git a/src/notation.md b/src/notation.md
@@ -19,6 +19,7 @@ The following notations are used by the *Lexer* and *Syntax* grammar snippets:
 | x<sup>a..b</sup>  | HEX_DIGIT<sup>1..6</sup>      | a to b repetitions of x                   |
 | Rule1 Rule2       | `fn` _Name_ _Parameters_      | Sequence of rules in order                |
 | \|                | `u8` \| `u16`, Block \| Item  | Either one or another                     |
+| !                 | !COMMENT                      | Matches if the expression does not follow, without consuming any input |
 | \[ ]               | \[`b` `B`]                     | Any of the characters listed              |
 | \[ - ]             | \[`a`-`z`]                     | Any of the characters in the range        |
 | ~\[ ]              | ~\[`b` `B`]                    | Any characters, except those listed       |

diff --git a/src/tokens.md b/src/tokens.md
@@ -115,7 +115,7 @@ r[lex.token.literal.suffix.syntax]
 ```grammar,lexer
 SUFFIX -> IDENTIFIER_OR_KEYWORD _except `_`_
 
-SUFFIX_NO_E -> SUFFIX _not beginning with `e` or `E`_
+SUFFIX_NO_E -> ![`e` `E`] SUFFIX
 ```
 
 r[lex.token.literal.suffix.validity]
@@ -253,8 +253,7 @@ r[lex.token.byte.syntax]
 BYTE_LITERAL ->
     `b'` ^ ( ASCII_FOR_CHAR | BYTE_ESCAPE )  `'` SUFFIX?
 
-ASCII_FOR_CHAR ->
-    <any ASCII (i.e. 0x00 to 0x7F) except `'`, `\`, LF, CR, or TAB>
+ASCII_FOR_CHAR -> ![`'` `\` LF CR TAB] ASCII
 
 BYTE_ESCAPE ->
       `\x` HEX_DIGIT HEX_DIGIT
@@ -272,8 +271,7 @@ r[lex.token.str-byte.syntax]
 BYTE_STRING_LITERAL ->
     `b"` ^ ( ASCII_FOR_STRING | BYTE_ESCAPE | STRING_CONTINUE )* `"` SUFFIX?
 
-ASCII_FOR_STRING ->
-    <any ASCII (i.e 0x00 to 0x7F) except `"`, `\`, or CR>
+ASCII_FOR_STRING -> ![`"` `\` CR] ASCII
 ```
 
 r[lex.token.str-byte.intro]
@@ -309,8 +307,7 @@ RAW_BYTE_STRING_CONTENT ->
       `"` ^ ASCII_FOR_RAW*? `"`
     | `#` RAW_BYTE_STRING_CONTENT `#`
 
-ASCII_FOR_RAW ->
-    <any ASCII (i.e. 0x00 to 0x7F) except CR>
+ASCII_FOR_RAW -> !CR ASCII
 ```
 
 r[lex.token.str-byte-raw.intro]
@@ -559,7 +556,7 @@ r[lex.token.literal.float.syntax]
 FLOAT_LITERAL ->
       DEC_LITERAL (`.` DEC_LITERAL)? FLOAT_EXPONENT SUFFIX?
     | DEC_LITERAL `.` DEC_LITERAL SUFFIX_NO_E?
-    | DEC_LITERAL `.` _not immediately followed by `.`, `_` or an XID_Start character_
+    | DEC_LITERAL `.` !(`.` | `_` | XID_Start)
 
 FLOAT_EXPONENT ->
     (`e`|`E`) (`+`|`-`)? `_`* DEC_DIGIT (DEC_DIGIT|`_`)*
@@ -608,13 +605,12 @@ r[lex.token.literal.reserved.syntax]
 RESERVED_NUMBER ->
       BIN_LITERAL [`2`-`9`]
     | OCT_LITERAL [`8`-`9`]
-    | ( BIN_LITERAL | OCT_LITERAL | HEX_LITERAL ) `.` _not immediately followed by `.`, `_` or an XID_Start character_
+    | ( BIN_LITERAL | OCT_LITERAL | HEX_LITERAL ) `.` !(`.` | `_` | XID_Start)
     | ( BIN_LITERAL | OCT_LITERAL ) (`e`|`E`)
-    | `0b` `_`* <end of input or not BIN_DIGIT>
-    | `0o` `_`* <end of input or not OCT_DIGIT>
-    | `0x` `_`* <end of input or not HEX_DIGIT>
+    | `0b` `_`* !BIN_DIGIT
+    | `0o` `_`* !OCT_DIGIT
+    | `0x` `_`* !HEX_DIGIT
     | DEC_LITERAL ( `.` DEC_LITERAL )? (`e` | `E`) (`+` | `-`)? <end of input or not DEC_DIGIT>
-
 ```
 
 r[lex.token.literal.reserved.intro]
@@ -657,16 +653,16 @@ r[lex.token.life.syntax]
 ```grammar,lexer
 LIFETIME_TOKEN ->
       RAW_LIFETIME
-    | `'` IDENTIFIER_OR_KEYWORD _not immediately followed by `'`_
+    | `'` IDENTIFIER_OR_KEYWORD !`'`
 
 LIFETIME_OR_LABEL ->
       RAW_LIFETIME
-    | `'` NON_KEYWORD_IDENTIFIER _not immediately followed by `'`_
+    | `'` NON_KEYWORD_IDENTIFIER !`'`
 
 RAW_LIFETIME ->
-    `'r#` IDENTIFIER_OR_KEYWORD _not immediately followed by `'`_
+    `'r#` ^ IDENTIFIER_OR_KEYWORD !`'`
 
-RESERVED_RAW_LIFETIME -> `'r#` (`_` | `crate` | `self` | `Self` | `super`) _not immediately followed by `'`_
+RESERVED_RAW_LIFETIME -> `'r#` (`_` | `crate` | `self` | `Self` | `super`) !(`'` | XID_Continue)
 ```
 
 r[lex.token.life.intro]

diff --git a/tools/grammar/src/lib.rs b/tools/grammar/src/lib.rs
@@ -3,6 +3,7 @@
 use diagnostics::{Diagnostics, warn_or_err};
 use regex::Regex;
 use std::collections::{HashMap, HashSet};
+use std::fmt::{Display, Formatter};
 use std::path::{Path, PathBuf};
 use std::sync::LazyLock;
 use walkdir::WalkDir;
@@ -50,6 +51,8 @@ pub enum ExpressionKind {
     Sequence(Vec<Expression>),
     /// `A?`
     Optional(Box<Expression>),
+    /// `!A`
+    NegativeLookahead(Box<Expression>),
     /// `A*`
     Repeat(Box<Expression>),
     /// `A*?`
@@ -79,7 +82,7 @@ pub enum ExpressionKind {
     /// `^ A B C`
     Cut(Box<Expression>),
     /// `U+0060`
-    Unicode(String),
+    Unicode((char, String)),
 }
 
 #[derive(Clone, Debug)]
@@ -89,7 +92,34 @@ pub enum Characters {
     /// `` `_` ``
     Terminal(String),
     /// `` `A`-`Z` ``
-    Range(char, char),
+    Range(Character, Character),
+}
+
+#[derive(Clone, Debug)]
+pub enum Character {
+    Char(char),
+    /// `U+0060`
+    ///
+    /// The `String` is the hex digits after `U+`.
+    Unicode((char, String)),
+}
+
+impl Character {
+    pub fn get_ch(&self) -> char {
+        match self {
+            Character::Char(ch) => *ch,
+            Character::Unicode((ch, _)) => *ch,
+        }
+    }
+}
+
+impl Display for Character {
+    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), std::fmt::Error> {
+        match self {
+            Character::Char(ch) => write!(f, "`{ch}`"),
+            Character::Unicode((_, s)) => write!(f, "U+{s}"),
+        }
+    }
 }
 
 impl Grammar {
@@ -113,6 +143,7 @@ impl Expression {
         match &self.kind {
             ExpressionKind::Grouped(e)
             | ExpressionKind::Optional(e)
+            | ExpressionKind::NegativeLookahead(e)
             | ExpressionKind::Repeat(e)
             | ExpressionKind::RepeatNonGreedy(e)
             | ExpressionKind::RepeatPlus(e)