From 8429ad5ecbf6d2b1d4c8e746ad7909fa3fa35378 Mon Sep 17 00:00:00 2001 From: Eric Huss Date: Sat, 14 Feb 2026 20:14:21 -0800 Subject: [PATCH 01/12] Switch to new range syntax This changes the syntax for range repeat so that it handles inclusive and exclusive upper bounds with `..=` and `..`. The old syntax was confusing. It used `..` for inclusive bound, but that's not how Rust syntax works. This changes it so that it uses `..=` for inclusive bounds to be consistent with Rust syntax. There are some other options for range syntax that I considered: - `{a,b}` which is the syntax used by most regex engines, and some parsers like Pest and Parsimonious. - IETF ABNF and W3C EBNF uses `a*bexpr` where `a` and `b` are optional. - Peg-rs uses `*` where `n` and `m` are optional. - Various languages use `:` (Python, Julia, Excel, etc.) or `..` (Rust, Kotlin, Swift, C#, F#, Zig, Perl, etc.) to represent ranges. This will become more relevant when we switch the raw string literals to use a bounded range. We can't easily avoid the use of bounded repetition because of raw-string's bound of 255. Listing out 255 variants would be just too much, and it is convenient to avoid English-descriptive rules. --- dev-guide/src/grammar.md | 36 ++++-- src/notation.md | 3 +- src/tokens.md | 2 +- tools/grammar/src/lib.rs | 30 ++++- tools/grammar/src/parser.rs | 33 +++-- .../src/grammar/render_markdown.rs | 17 ++- .../src/grammar/render_railroad.rs | 114 ++++++++++++++---- 7 files changed, 179 insertions(+), 56 deletions(-) diff --git a/dev-guide/src/grammar.md b/dev-guide/src/grammar.md index 2f8d41f822..305ee411d3 100644 --- a/dev-guide/src/grammar.md +++ b/dev-guide/src/grammar.md @@ -39,19 +39,34 @@ Sequence -> (` `* AdornedExpr)* ` `* Cut | (` `* AdornedExpr)+ -AdornedExpr -> ExprRepeat Suffix? Footnote? +AdornedExpr -> Expr1 Quantifier? Suffix? Footnote? Suffix -> ` _` * `_` Footnote -> `[^` ~[`]` LF]+ `]` -ExprRepeat -> - Expr1 `?` - | Expr1 `*?` - | Expr1 `*` - | Expr1 `+?` - | Expr1 `+` - | Expr1 `{` Range? `..` Range? `}` +Quantifier -> + Optional + | Repeat + | RepeatNonGreedy + | RepeatPlus + | RepeatPlusNonGreedy + | RepeatRange + | RepeatRangeInclusive + +Optional -> `?` + +Repeat -> `*` + +RepeatNonGreedy -> `*?` + +RepeatPlus -> `+` + +RepeatPlusNonGreedy -> `+?` + +RepeatRange -> `{` Range? `..` Range? `}` + +RepeatRangeInclusive -> `{` Range `..=` Range `}` Range -> [0-9]+ @@ -121,10 +136,11 @@ The general format is a series of productions separated by blank lines. The expr | Footnote | \[^extern-safe\] | Adds a footnote, which can supply extra information that may be helpful to the user. The footnote itself should be defined outside of the code block like a normal Markdown footnote. | | Optional | Expr? | The preceding expression is optional. | | Repeat | Expr* | The preceding expression is repeated 0 or more times. | -| Repeat (non-greedy) | Expr*? | The preceding expression is repeated 0 or more times without being greedy. | +| RepeatNonGreedy | Expr*? | The preceding expression is repeated 0 or more times without being greedy. | | RepeatPlus | Expr+ | The preceding expression is repeated 1 or more times. | -| RepeatPlus (non-greedy) | Expr+? | The preceding expression is repeated 1 or more times without being greedy. | +| RepeatPlusNonGreedy | Expr+? | The preceding expression is repeated 1 or more times without being greedy. | | RepeatRange | Expr{2..4} | The preceding expression is repeated between the range of times specified. Either bound can be excluded, which works just like Rust ranges. | +| RepeatRangeInclusive | Expr{2..=4} | The preceding expression is repeated between the range of times specified (inclusive). | ## Automatic linking diff --git a/src/notation.md b/src/notation.md index cda298a734..dcb30610e9 100644 --- a/src/notation.md +++ b/src/notation.md @@ -16,7 +16,8 @@ The following notations are used by the *Lexer* and *Syntax* grammar snippets: | x? | `pub`? | An optional item | | x\* | _OuterAttribute_\* | 0 or more of x | | x+ | _MacroMatch_+ | 1 or more of x | -| xa..b | HEX_DIGIT1..6 | a to b repetitions of x | +| xa..b | HEX_DIGIT1..6 | a to b repetitions (exclusive) of x | +| xa..=b | HEX_DIGIT1..=6 | a to b repetitions (inclusive) of x | | Rule1 Rule2 | `fn` _Name_ _Parameters_ | Sequence of rules in order | | \| | `u8` \| `u16`, Block \| Item | Either one or another | | \[ ] | \[`b` `B`] | Any of the characters listed | diff --git a/src/tokens.md b/src/tokens.md index b6a0124320..047afd76a6 100644 --- a/src/tokens.md +++ b/src/tokens.md @@ -157,7 +157,7 @@ ASCII_ESCAPE -> | `\n` | `\r` | `\t` | `\\` | `\0` UNICODE_ESCAPE -> - `\u{` ( HEX_DIGIT `_`* ){1..6} _valid hex char value_ `}`[^valid-hex-char] + `\u{` ( HEX_DIGIT `_`* ){1..=6} _valid hex char value_ `}`[^valid-hex-char] ``` [^valid-hex-char]: See [lex.token.literal.char-escape.unicode]. diff --git a/tools/grammar/src/lib.rs b/tools/grammar/src/lib.rs index 70e1a8f9a8..6fbb886558 100644 --- a/tools/grammar/src/lib.rs +++ b/tools/grammar/src/lib.rs @@ -3,6 +3,7 @@ use diagnostics::{Diagnostics, warn_or_err}; use regex::Regex; use std::collections::{HashMap, HashSet}; +use std::fmt::{Display, Formatter}; use std::path::{Path, PathBuf}; use std::sync::LazyLock; use walkdir::WalkDir; @@ -58,8 +59,13 @@ pub enum ExpressionKind { RepeatPlus(Box), /// `A+?` RepeatPlusNonGreedy(Box), - /// `A{2..4}` - RepeatRange(Box, Option, Option), + /// `A{2..4}` or `A{2..=4}` + RepeatRange { + expr: Box, + min: Option, + max: Option, + limit: RangeLimit, + }, /// `NonTerminal` Nt(String), /// `` `string` `` @@ -82,6 +88,24 @@ pub enum ExpressionKind { Unicode(String), } +#[derive(Copy, Clone, Debug)] +pub enum RangeLimit { + /// `..` + HalfOpen, + /// `..=` + Closed, +} + +impl Display for RangeLimit { + fn fmt(&self, f: &mut Formatter<'_>) -> std::result::Result<(), std::fmt::Error> { + match self { + RangeLimit::HalfOpen => "..", + RangeLimit::Closed => "..=", + } + .fmt(f) + } +} + #[derive(Clone, Debug)] pub enum Characters { /// `LF` @@ -117,7 +141,7 @@ impl Expression { | ExpressionKind::RepeatNonGreedy(e) | ExpressionKind::RepeatPlus(e) | ExpressionKind::RepeatPlusNonGreedy(e) - | ExpressionKind::RepeatRange(e, _, _) + | ExpressionKind::RepeatRange { expr: e, .. } | ExpressionKind::NegExpression(e) | ExpressionKind::Cut(e) => { e.visit_nt(callback); diff --git a/tools/grammar/src/parser.rs b/tools/grammar/src/parser.rs index d4240ae4d7..7e8c29aaaf 100644 --- a/tools/grammar/src/parser.rs +++ b/tools/grammar/src/parser.rs @@ -1,6 +1,6 @@ //! A parser of the ENBF-like grammar. -use super::{Characters, Expression, ExpressionKind, Grammar, Production}; +use super::{Characters, Expression, ExpressionKind, Grammar, Production, RangeLimit}; use std::fmt; use std::fmt::Display; use std::path::Path; @@ -428,24 +428,37 @@ impl Parser<'_> { }) } - /// Parse `{a..}` | `{..b}` | `{a..b}` after expression. + /// Parse `{a..b}` | `{a..=b}` after expression. fn parse_repeat_range(&mut self, kind: ExpressionKind) -> Result { self.expect("{", "expected `{`")?; - let a = self.take_while(&|x| x.is_ascii_digit()); - let Ok(a) = (!a.is_empty()).then(|| a.parse::()).transpose() else { + let min = self.take_while(&|x| x.is_ascii_digit()); + let Ok(min) = (!min.is_empty()).then(|| min.parse::()).transpose() else { bail!(self, "malformed range start"); }; - self.expect("..", "expected `..`")?; - let b = self.take_while(&|x| x.is_ascii_digit()); - let Ok(b) = (!b.is_empty()).then(|| b.parse::()).transpose() else { + self.expect("..", "expected `..` or `..=`")?; + let limit = if self.take_str("=") { + RangeLimit::Closed + } else { + RangeLimit::HalfOpen + }; + let max = self.take_while(&|x| x.is_ascii_digit()); + let Ok(max) = (!max.is_empty()).then(|| max.parse::()).transpose() else { bail!(self, "malformed range end"); }; - match (a, b) { - (Some(a), Some(b)) if b < a => bail!(self, "range {a}..{b} is malformed"), + match (min, max, limit) { + (Some(min), Some(max), _) if max < min => { + bail!(self, "range {min}{limit}{max} is malformed") + } + (_, None, RangeLimit::Closed) => bail!(self, "closed range must have an upper bound"), _ => {} } self.expect("}", "expected `}`")?; - Ok(ExpressionKind::RepeatRange(box_kind(kind), a, b)) + Ok(ExpressionKind::RepeatRange { + expr: box_kind(kind), + min, + max, + limit, + }) } fn parse_suffix(&mut self) -> Result> { diff --git a/tools/mdbook-spec/src/grammar/render_markdown.rs b/tools/mdbook-spec/src/grammar/render_markdown.rs index a5540b4169..edec8da035 100644 --- a/tools/mdbook-spec/src/grammar/render_markdown.rs +++ b/tools/mdbook-spec/src/grammar/render_markdown.rs @@ -71,7 +71,7 @@ fn last_expr(expr: &Expression) -> &ExpressionKind { | ExpressionKind::RepeatNonGreedy(_) | ExpressionKind::RepeatPlus(_) | ExpressionKind::RepeatPlusNonGreedy(_) - | ExpressionKind::RepeatRange(_, _, _) + | ExpressionKind::RepeatRange { .. } | ExpressionKind::Nt(_) | ExpressionKind::Terminal(_) | ExpressionKind::Prose(_) @@ -135,13 +135,18 @@ fn render_expression(expr: &Expression, cx: &RenderCtx, output: &mut String) { render_expression(e, cx, output); output.push_str("+ (non-greedy)"); } - ExpressionKind::RepeatRange(e, a, b) => { - render_expression(e, cx, output); + ExpressionKind::RepeatRange { + expr, + min, + max, + limit, + } => { + render_expression(expr, cx, output); write!( output, - "{}..{}", - a.map(|v| v.to_string()).unwrap_or_default(), - b.map(|v| v.to_string()).unwrap_or_default(), + "{min}{limit}{max}", + min = min.map(|v| v.to_string()).unwrap_or_default(), + max = max.map(|v| v.to_string()).unwrap_or_default(), ) .unwrap(); } diff --git a/tools/mdbook-spec/src/grammar/render_railroad.rs b/tools/mdbook-spec/src/grammar/render_railroad.rs index 6efb065a34..402abe2fc8 100644 --- a/tools/mdbook-spec/src/grammar/render_railroad.rs +++ b/tools/mdbook-spec/src/grammar/render_railroad.rs @@ -3,7 +3,7 @@ use super::RenderCtx; use crate::grammar::Grammar; use anyhow::bail; -use grammar::{Characters, Expression, ExpressionKind, Production}; +use grammar::{Characters, Expression, ExpressionKind, Production, RangeLimit}; use railroad::*; use regex::Regex; use std::fmt::Write; @@ -78,9 +78,13 @@ fn render_expression(expr: &Expression, cx: &RenderCtx, stack: bool) -> Option { - render_expression(e, cx, stack)? - } + ExpressionKind::Grouped(e) + | ExpressionKind::RepeatRange { + expr: e, + min: Some(1), + max: Some(1), + limit: RangeLimit::Closed, + } => render_expression(e, cx, stack)?, ExpressionKind::Alt(es) => { let choices: Vec<_> = es .iter() @@ -139,15 +143,25 @@ fn render_expression(expr: &Expression, cx: &RenderCtx, stack: bool) -> Option { + | ExpressionKind::RepeatRange { + expr: e, + min: None | Some(0), + max: Some(1), + limit: RangeLimit::Closed, + } => { let n = render_expression(e, cx, stack)?; Box::new(Optional::new(n)) } // Treat `e*` and `e{..}` / `e{0..}` equally. ExpressionKind::Repeat(e) - | ExpressionKind::RepeatRange(e, None | Some(0), None) => { + | ExpressionKind::RepeatRange { + expr: e, + min: None | Some(0), + max: None, + limit: RangeLimit::HalfOpen, + } => { let n = render_expression(e, cx, stack)?; Box::new(Optional::new(Repeat::new(n, railroad::Empty))) } @@ -158,7 +172,14 @@ fn render_expression(expr: &Expression, cx: &RenderCtx, stack: bool) -> Option { + ExpressionKind::RepeatPlus(e) + | ExpressionKind::RepeatRange { + expr: e, + min: Some(1), + max: None, + limit: RangeLimit::HalfOpen, + .. + } => { let n = render_expression(e, cx, stack)?; Box::new(Repeat::new(n, railroad::Empty)) } @@ -168,38 +189,81 @@ fn render_expression(expr: &Expression, cx: &RenderCtx, stack: bool) -> Option Box::new(railroad::Empty), - // Treat `e{..b}` / `e{0..b}` as `(e{1..b})?`. - ExpressionKind::RepeatRange(e, None | Some(0), Some(b @ 2..)) => { + // For `e{a..=0}` or `e{a..0}` or `e{..1}` render an empty node. + ExpressionKind::RepeatRange { max: Some(0), .. } + | ExpressionKind::RepeatRange { + max: Some(1), + limit: RangeLimit::HalfOpen, + .. + } => Box::new(railroad::Empty), + // Treat `e{..=b}` / `e{0..=b}` as `(e{1..=b})?`. + ExpressionKind::RepeatRange { + expr: e, + min: None | Some(0), + max: Some(b @ 2..), + limit, + } => { state = ExpressionKind::Optional(Box::new(Expression::new_kind( - ExpressionKind::RepeatRange(e.clone(), Some(1), Some(*b)), + ExpressionKind::RepeatRange { + expr: e.clone(), + min: Some(1), + max: Some(*b), + limit: *limit, + }, ))); break 'cont &state; } - // Render `e{1..b}` directly. - ExpressionKind::RepeatRange(e, Some(1), Some(b @ 2..)) => { + // Render `e{1..=b}` directly. + ExpressionKind::RepeatRange { + expr: e, + min: Some(1), + max: Some(b @ 2..), + limit, + } => { let n = render_expression(e, cx, stack)?; - let cmt = format!("at most {b} more times", b = b - 1); + let more = match limit { + RangeLimit::HalfOpen => b - 2, + RangeLimit::Closed => b - 1, + }; + let cmt = format!("at most {more} more times"); let r = Repeat::new(n, Comment::new(cmt)); Box::new(r) } - // Treat `e{a..}` as `e{a-1..a-1} e{1..}` and `e{a..b}` as - // `e{a-1..a-1} e{1..b-(a-1)}`, and treat `e{x..x}` for some - // `x` as a sequence of `e` nodes of length `x`. - ExpressionKind::RepeatRange(e, Some(a @ 2..), b) => { + // Treat: + // - `e{a..}` as `e{0..a-1} e{1..}` + // - `e{a..=b}` as `e{0..a-1} e{1..=b-(a-1)}` + // - `e{a..b} as `e{0..a-1} {e..b-(a-1)}` + // - `e{x..=x}` for some `x` as a sequence of `e` nodes of length `x` + ExpressionKind::RepeatRange { + expr: e, + min: Some(a @ 2..), + max: b @ None, + limit, + } + | ExpressionKind::RepeatRange { + expr: e, + min: Some(a @ 2..), + max: b @ Some(_), + limit, + } => { let mut es = Vec::::new(); for _ in 0..(a - 1) { es.push(*e.clone()); } - es.push(Expression::new_kind(ExpressionKind::RepeatRange( - e.clone(), - Some(1), - b.map(|x| x - (a - 1)), - ))); + es.push(Expression::new_kind(ExpressionKind::RepeatRange { + expr: e.clone(), + min: Some(1), + max: b.map(|x| x - (a - 1)), + limit: *limit, + })); state = ExpressionKind::Sequence(es); break 'cont &state; } + ExpressionKind::RepeatRange { + max: None, + limit: RangeLimit::Closed, + .. + } => unreachable!("closed range must have upper bound"), ExpressionKind::Nt(nt) => node_for_nt(cx, nt), ExpressionKind::Terminal(t) => Box::new(Terminal::new(t.clone())), ExpressionKind::Prose(s) => Box::new(Terminal::new(s.clone())), From d1467b4f77c232c8f59f336dd0608b4419c283ee Mon Sep 17 00:00:00 2001 From: Travis Cross Date: Wed, 18 Feb 2026 00:06:11 +0000 Subject: [PATCH 02/12] Fix range in Unicode production to use `..=` Under the new exclusive-range semantics, `4..4` is an empty range -- it matches zero characters. The intent is to match exactly four characters, so we need the inclusive form `4..=4`. --- dev-guide/src/grammar.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev-guide/src/grammar.md b/dev-guide/src/grammar.md index 305ee411d3..8e8b9c54c7 100644 --- a/dev-guide/src/grammar.md +++ b/dev-guide/src/grammar.md @@ -81,7 +81,7 @@ Expr1 -> | Group | NegativeExpression -Unicode -> `U+` [`A`-`Z` `0`-`9`]4..4 +Unicode -> `U+` [`A`-`Z` `0`-`9`]4..=4 NonTerminal -> Name From 5aa878986ff5301735ccc2869a62f84382b039a8 Mon Sep 17 00:00:00 2001 From: Travis Cross Date: Wed, 18 Feb 2026 00:06:29 +0000 Subject: [PATCH 03/12] Reject empty exclusive ranges in parser An exclusive range such as `{2..2}` is empty (it matches zero repetitions), and `{3..2}` doesn't make sense. The existing validation catches the case where `max < min`, but it doesn't catch `max == min` for half-open ranges, which is equally degenerate. We now reject `b <= a` when the range limit is `HalfOpen`. The closed-range check is unchanged: `{2..=2}` (exactly two repetitions) remains valid. --- tools/grammar/src/parser.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/grammar/src/parser.rs b/tools/grammar/src/parser.rs index 7e8c29aaaf..04bae58af7 100644 --- a/tools/grammar/src/parser.rs +++ b/tools/grammar/src/parser.rs @@ -449,6 +449,9 @@ impl Parser<'_> { (Some(min), Some(max), _) if max < min => { bail!(self, "range {min}{limit}{max} is malformed") } + (Some(min), Some(max), RangeLimit::HalfOpen) if max <= min => { + bail!(self, "half-open range maximum must be greater than minimum") + } (_, None, RangeLimit::Closed) => bail!(self, "closed range must have an upper bound"), _ => {} } From 7ee8ee2b39151cab55c55586797a7f76035a8cc2 Mon Sep 17 00:00:00 2001 From: Travis Cross Date: Wed, 18 Feb 2026 00:06:44 +0000 Subject: [PATCH 04/12] Allow optional lower bound in `RepeatRangeInclusive` The parser already accepts `{..=b}` (no lower bound) for inclusive ranges, but the grammar production requires both bounds. We update the production to use `Range?` for the lower bound, matching what the parser actually accepts. The forms `{..=}` and `{a..=}` remain correctly rejected by the parser (a closed range must have an upper bound). We also update the description table to note that the lower bound can be omitted. --- dev-guide/src/grammar.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dev-guide/src/grammar.md b/dev-guide/src/grammar.md index 8e8b9c54c7..40e4883096 100644 --- a/dev-guide/src/grammar.md +++ b/dev-guide/src/grammar.md @@ -66,7 +66,7 @@ RepeatPlusNonGreedy -> `+?` RepeatRange -> `{` Range? `..` Range? `}` -RepeatRangeInclusive -> `{` Range `..=` Range `}` +RepeatRangeInclusive -> `{` Range? `..=` Range `}` Range -> [0-9]+ @@ -140,7 +140,7 @@ The general format is a series of productions separated by blank lines. The expr | RepeatPlus | Expr+ | The preceding expression is repeated 1 or more times. | | RepeatPlusNonGreedy | Expr+? | The preceding expression is repeated 1 or more times without being greedy. | | RepeatRange | Expr{2..4} | The preceding expression is repeated between the range of times specified. Either bound can be excluded, which works just like Rust ranges. | -| RepeatRangeInclusive | Expr{2..=4} | The preceding expression is repeated between the range of times specified (inclusive). | +| RepeatRangeInclusive | Expr{2..=4} | The preceding expression is repeated between the inclusive range of times specified. The lower bound can be omitted. | ## Automatic linking From f411f3351d8a19a6dcca933b50b2ab3321d317fe Mon Sep 17 00:00:00 2001 From: Travis Cross Date: Wed, 18 Feb 2026 00:07:29 +0000 Subject: [PATCH 05/12] Improve comments in `render_railroad.rs` Several comments in the `RepeatRange` match block are inaccurate or incomplete after the switch to the new range syntax. The decomposition comment for the `min >= 2` arm has a missing backtick and an incorrect formula. We fix the backtick and correct the decomposition for `e{a..b}` from `{e..b-(a-1)}` to `e{1..b-(a-1)}`. We also drop the separate `e{x..=x}` bullet, which is just a special case of the `e{a..=b}` decomposition. The empty-node comment lists `e{a..=0}` and `e{a..0}`, which the parser now rejects when `a > 0` (an earlier commit rejects empty exclusive ranges, and the existing validation rejects `max < min`). We update the comment to list the actually-reachable cases: `e{..=0}`, `e{0..=0}`, `e{..0}`, `e{..1}`, and `e{0..1}`. We also update comments on the other arms to mention both the half-open and closed forms they match. --- tools/mdbook-spec/src/grammar/render_railroad.rs | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/tools/mdbook-spec/src/grammar/render_railroad.rs b/tools/mdbook-spec/src/grammar/render_railroad.rs index 402abe2fc8..9047566f58 100644 --- a/tools/mdbook-spec/src/grammar/render_railroad.rs +++ b/tools/mdbook-spec/src/grammar/render_railroad.rs @@ -189,14 +189,15 @@ fn render_expression(expr: &Expression, cx: &RenderCtx, stack: bool) -> Option Box::new(railroad::Empty), - // Treat `e{..=b}` / `e{0..=b}` as `(e{1..=b})?`. + // Treat `e{..b}` / `e{0..b}` / `e{..=b}` / `e{0..=b}` as + // `(e{1..=b})?` (or `(e{1..b})?` for half-open). ExpressionKind::RepeatRange { expr: e, min: None | Some(0), @@ -213,7 +214,7 @@ fn render_expression(expr: &Expression, cx: &RenderCtx, stack: bool) -> Option Option= 2 into a fixed prefix + // and a remainder: // - `e{a..}` as `e{0..a-1} e{1..}` // - `e{a..=b}` as `e{0..a-1} e{1..=b-(a-1)}` - // - `e{a..b} as `e{0..a-1} {e..b-(a-1)}` - // - `e{x..=x}` for some `x` as a sequence of `e` nodes of length `x` + // - `e{a..b}` as `e{0..a-1} e{1..b-(a-1)}` ExpressionKind::RepeatRange { expr: e, min: Some(a @ 2..), From a1d551f3eac501881d632d8978a70b2deb8d5b1d Mon Sep 17 00:00:00 2001 From: Travis Cross Date: Wed, 18 Feb 2026 00:07:43 +0000 Subject: [PATCH 06/12] Improve phrasing of range descriptions The parenthetical "(exclusive)" and "(inclusive)" interrupt the noun phrase "a to b repetitions of x" awkwardly, leaving the reader to work out which bound is being qualified. We rephrase to "a to b repetitions of x, exclusive of b" and "a to b repetitions of x, inclusive of b", which reads more naturally and makes the referent of the qualifier unambiguous. --- src/notation.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/notation.md b/src/notation.md index dcb30610e9..d3ad8362cc 100644 --- a/src/notation.md +++ b/src/notation.md @@ -16,8 +16,8 @@ The following notations are used by the *Lexer* and *Syntax* grammar snippets: | x? | `pub`? | An optional item | | x\* | _OuterAttribute_\* | 0 or more of x | | x+ | _MacroMatch_+ | 1 or more of x | -| xa..b | HEX_DIGIT1..6 | a to b repetitions (exclusive) of x | -| xa..=b | HEX_DIGIT1..=6 | a to b repetitions (inclusive) of x | +| xa..b | HEX_DIGIT1..6 | a to b repetitions of x, exclusive of b | +| xa..=b | HEX_DIGIT1..=6 | a to b repetitions of x, inclusive of b | | Rule1 Rule2 | `fn` _Name_ _Parameters_ | Sequence of rules in order | | \| | `u8` \| `u16`, Block \| Item | Either one or another | | \[ ] | \[`b` `B`] | Any of the characters listed | From acb0eb5dd564ebc3077a74e29cda592e95576e3c Mon Sep 17 00:00:00 2001 From: Travis Cross Date: Wed, 18 Feb 2026 00:07:55 +0000 Subject: [PATCH 07/12] Remove redundant `..` in `RepeatRange` pattern All four fields of the `RepeatRange` variant (`expr`, `min`, `max`, `limit`) are already named in this pattern, so the trailing `..` is redundant. --- tools/mdbook-spec/src/grammar/render_railroad.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/mdbook-spec/src/grammar/render_railroad.rs b/tools/mdbook-spec/src/grammar/render_railroad.rs index 9047566f58..3aee63c872 100644 --- a/tools/mdbook-spec/src/grammar/render_railroad.rs +++ b/tools/mdbook-spec/src/grammar/render_railroad.rs @@ -178,7 +178,6 @@ fn render_expression(expr: &Expression, cx: &RenderCtx, stack: bool) -> Option { let n = render_expression(e, cx, stack)?; Box::new(Repeat::new(n, railroad::Empty)) From 49f85014e80542db30adcf0ec7e13f6c6ffea2b3 Mon Sep 17 00:00:00 2001 From: Travis Cross Date: Wed, 18 Feb 2026 00:10:40 +0000 Subject: [PATCH 08/12] Handle empty exclusive ranges in renderer Even though the parser now rejects `{a..a}` half-open ranges, the renderer should handle them correctly on principle. A half-open range where `min >= max` is empty -- it specifies zero repetitions -- and should render as an empty node rather than falling through to the decomposition arm, which would incorrectly produce copies of the expression. --- tools/mdbook-spec/src/grammar/render_railroad.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tools/mdbook-spec/src/grammar/render_railroad.rs b/tools/mdbook-spec/src/grammar/render_railroad.rs index 3aee63c872..e5789e5a3a 100644 --- a/tools/mdbook-spec/src/grammar/render_railroad.rs +++ b/tools/mdbook-spec/src/grammar/render_railroad.rs @@ -229,6 +229,15 @@ fn render_expression(expr: &Expression, cx: &RenderCtx, stack: bool) -> Option= max is empty (e.g., + // `e{2..2}` means zero repetitions). + ExpressionKind::RepeatRange { + min: Some(a), + max: Some(b), + limit: RangeLimit::HalfOpen, + .. + } if b <= a => Box::new(railroad::Empty), + // Decompose ranges with min >= 2 into a fixed prefix // and a remainder: // - `e{a..}` as `e{0..a-1} e{1..}` From 3ea103985a9a77cf8b5199f32f59f18d327bb4ad Mon Sep 17 00:00:00 2001 From: Travis Cross Date: Wed, 18 Feb 2026 00:22:54 +0000 Subject: [PATCH 09/12] Add parser tests for range validation The parser validates several invariants on repeat ranges: half-open ranges must satisfy `max > min`, closed ranges need an explicit upper bound, and malformed ranges (`max < min`) are rejected outright. We had no test coverage for any of these checks. Let's add tests for the full matrix of valid range forms (half-open, closed, with and without bounds) as well as the error paths (`max < min`, empty exclusive ranges like `x{2..2}` and `x{0..0}`, and closed ranges missing an upper bound). We also cover the edge cases `x{2..=2}` (exactly two, via closed range) and `x{0..1}` (exactly zero, via half-open range). --- tools/grammar/src/parser.rs | 134 +++++++++++++++++++++++++++++++++++- 1 file changed, 133 insertions(+), 1 deletion(-) diff --git a/tools/grammar/src/parser.rs b/tools/grammar/src/parser.rs index 04bae58af7..85e4310907 100644 --- a/tools/grammar/src/parser.rs +++ b/tools/grammar/src/parser.rs @@ -539,7 +539,7 @@ fn translate_position(input: &str, index: usize) -> (&str, usize, usize) { #[cfg(test)] mod tests { use crate::parser::{parse_grammar, translate_position}; - use crate::{ExpressionKind, Grammar}; + use crate::{ExpressionKind, Grammar, RangeLimit}; use std::path::Path; #[test] @@ -603,4 +603,136 @@ mod tests { let err = parse(input).unwrap_err(); assert!(err.contains("expected expression after cut operator")); } + + /// Extract the `RepeatRange` fields from a single-production + /// grammar whose rule body is a repeat-range expression. + fn repeat_range(input: &str) -> (Option, Option, RangeLimit) { + let grammar = parse(input).unwrap(); + let rule = grammar.productions.get("A").unwrap(); + let ExpressionKind::RepeatRange { + min, max, limit, .. + } = &rule.expression.kind + else { + panic!("expected RepeatRange, got {:?}", rule.expression.kind); + }; + (*min, *max, *limit) + } + + // -- Valid ranges ----------------------------------------------- + + #[test] + fn test_range_half_open() { + let (min, max, limit) = repeat_range("A -> x{2..5}"); + assert_eq!(min, Some(2)); + assert_eq!(max, Some(5)); + assert!(matches!(limit, RangeLimit::HalfOpen)); + } + + #[test] + fn test_range_half_open_no_min() { + let (min, max, limit) = repeat_range("A -> x{..5}"); + assert_eq!(min, None); + assert_eq!(max, Some(5)); + assert!(matches!(limit, RangeLimit::HalfOpen)); + } + + #[test] + fn test_range_half_open_no_max() { + let (min, max, limit) = repeat_range("A -> x{2..}"); + assert_eq!(min, Some(2)); + assert_eq!(max, None); + assert!(matches!(limit, RangeLimit::HalfOpen)); + } + + #[test] + fn test_range_half_open_unbounded() { + let (min, max, limit) = repeat_range("A -> x{..}"); + assert_eq!(min, None); + assert_eq!(max, None); + assert!(matches!(limit, RangeLimit::HalfOpen)); + } + + #[test] + fn test_range_closed() { + let (min, max, limit) = repeat_range("A -> x{2..=5}"); + assert_eq!(min, Some(2)); + assert_eq!(max, Some(5)); + assert!(matches!(limit, RangeLimit::Closed)); + } + + #[test] + fn test_range_closed_no_min() { + let (min, max, limit) = repeat_range("A -> x{..=5}"); + assert_eq!(min, None); + assert_eq!(max, Some(5)); + assert!(matches!(limit, RangeLimit::Closed)); + } + + // -- Invalid ranges --------------------------------------------- + + #[test] + fn test_range_err_max_less_than_min() { + let err = parse("A -> x{3..2}").unwrap_err(); + assert!( + err.contains("malformed"), + "expected malformed error, got: {err}" + ); + } + + #[test] + fn test_range_err_empty_exclusive_equal() { + let err = parse("A -> x{2..2}").unwrap_err(); + assert!( + err.contains("half-open range maximum must be greater"), + "expected empty-exclusive error, got: {err}" + ); + } + + #[test] + fn test_range_err_empty_exclusive_zero() { + let err = parse("A -> x{0..0}").unwrap_err(); + assert!( + err.contains("half-open range maximum must be greater"), + "expected empty-exclusive error, got: {err}" + ); + } + + #[test] + fn test_range_err_closed_no_upper() { + let err = parse("A -> x{..=}").unwrap_err(); + assert!( + err.contains("closed range must have an upper bound"), + "expected closed-needs-upper error, got: {err}" + ); + } + + #[test] + fn test_range_err_closed_no_upper_with_min() { + let err = parse("A -> x{2..=}").unwrap_err(); + assert!( + err.contains("closed range must have an upper bound"), + "expected closed-needs-upper error, got: {err}" + ); + } + + // -- Valid edge cases ------------------------------------------- + + #[test] + fn test_range_closed_exact() { + // `x{2..=2}` means exactly 2 — not empty. + let (min, max, limit) = repeat_range("A -> x{2..=2}"); + assert_eq!(min, Some(2)); + assert_eq!(max, Some(2)); + assert!(matches!(limit, RangeLimit::Closed)); + } + + #[test] + fn test_range_half_open_zero_to_one() { + // `x{0..1}` means exactly 0 repetitions (the half-open + // range contains only 0). + let (min, max, limit) = repeat_range("A -> x{0..1}"); + assert_eq!(min, Some(0)); + assert_eq!(max, Some(1)); + assert!(matches!(limit, RangeLimit::HalfOpen)); + } } From a1fe2a43990c8762f30761fce3e1e5565f935b7f Mon Sep 17 00:00:00 2001 From: Travis Cross Date: Wed, 18 Feb 2026 00:25:23 +0000 Subject: [PATCH 10/12] Add renderer tests for range edge cases The range rendering logic in `render_railroad.rs` handles several tricky edge cases -- empty half-open ranges, exact closed ranges, decomposition of multi-element ranges -- but had no test coverage. We add a `for_test()` constructor on `RenderCtx` so that the renderer's internal `render_expression` function can be called from tests without needing a full `Chapter` and link-map setup. We then add five tests that construct `RepeatRange` expressions directly and verify the SVG output: - `e{2..2}` and `e{3..1}` both render as empty nodes (defense in depth against the parser's own rejection). - `e{1..=1}` renders as a single nonterminal with no repeat comment. - `e{2..=4}` renders with nonterminal content and a "more times" repeat comment. - `e{..=1}` renders as an optional containing the nonterminal. --- tools/mdbook-spec/src/grammar.rs | 11 +++ .../src/grammar/render_railroad.rs | 90 +++++++++++++++++++ 2 files changed, 101 insertions(+) diff --git a/tools/mdbook-spec/src/grammar.rs b/tools/mdbook-spec/src/grammar.rs index 576accd2d6..f0a4e3fe12 100644 --- a/tools/mdbook-spec/src/grammar.rs +++ b/tools/mdbook-spec/src/grammar.rs @@ -21,6 +21,17 @@ pub struct RenderCtx { for_summary: bool, } +#[cfg(test)] +impl RenderCtx { + pub(crate) fn for_test() -> Self { + RenderCtx { + md_link_map: HashMap::new(), + rr_link_map: HashMap::new(), + for_summary: false, + } + } +} + /// Replaces the text grammar in the given chapter with the rendered version. pub fn insert_grammar(grammar: &Grammar, chapter: &Chapter, diag: &mut Diagnostics) -> String { let link_map = make_relative_link_map(grammar, chapter); diff --git a/tools/mdbook-spec/src/grammar/render_railroad.rs b/tools/mdbook-spec/src/grammar/render_railroad.rs index e5789e5a3a..c82136af71 100644 --- a/tools/mdbook-spec/src/grammar/render_railroad.rs +++ b/tools/mdbook-spec/src/grammar/render_railroad.rs @@ -371,3 +371,93 @@ impl Node for Except { self.inner.draw(x, y, h_dir) } } + +#[cfg(test)] +mod tests { + use super::*; + use grammar::{Expression, ExpressionKind, RangeLimit}; + + /// Render an expression to an SVG string fragment. + fn render_to_svg(expr: &Expression) -> Option { + let cx = RenderCtx::for_test(); + let node = render_expression(expr, &cx, false)?; + let svg = node.draw(0, 0, svg::HDir::LTR); + Some(svg.to_string()) + } + + /// Build a `RepeatRange` expression wrapping a nonterminal `e`. + fn range_expr(min: Option, max: Option, limit: RangeLimit) -> Expression { + Expression::new_kind(ExpressionKind::RepeatRange { + expr: Box::new(Expression::new_kind(ExpressionKind::Nt("e".to_string()))), + min, + max, + limit, + }) + } + + #[test] + fn test_empty_exclusive_equal() { + // `e{2..2}` (half-open, min == max) renders as empty. + let expr = range_expr(Some(2), Some(2), RangeLimit::HalfOpen); + let svg = render_to_svg(&expr).unwrap(); + // An empty node produces a minimal SVG path with no + // nonterminal content. + assert!( + !svg.contains("nonterminal"), + "expected empty rendering for e{{2..2}}, got: {svg}" + ); + } + + #[test] + fn test_empty_inverted() { + // `e{3..1}` (half-open, max < min) renders as empty. + let expr = range_expr(Some(3), Some(1), RangeLimit::HalfOpen); + let svg = render_to_svg(&expr).unwrap(); + assert!( + !svg.contains("nonterminal"), + "expected empty rendering for e{{3..1}}, got: {svg}" + ); + } + + #[test] + fn test_closed_exact_one() { + // `e{1..=1}` renders as a single `e` (no repeat). + let expr = range_expr(Some(1), Some(1), RangeLimit::Closed); + let svg = render_to_svg(&expr).unwrap(); + assert!( + svg.contains("nonterminal"), + "expected nonterminal for e{{1..=1}}, got: {svg}" + ); + // Should not contain "more times" (no repeat comment). + assert!( + !svg.contains("more times"), + "e{{1..=1}} should not show a repeat comment" + ); + } + + #[test] + fn test_closed_range() { + // `e{2..=4}` renders with repeat indicators. + let expr = range_expr(Some(2), Some(4), RangeLimit::Closed); + let svg = render_to_svg(&expr).unwrap(); + assert!( + svg.contains("nonterminal"), + "expected nonterminal for e{{2..=4}}, got: {svg}" + ); + assert!( + svg.contains("more times"), + "e{{2..=4}} should show a repeat comment" + ); + } + + #[test] + fn test_closed_optional() { + // `e{..=1}` renders as optional. + let expr = range_expr(None, Some(1), RangeLimit::Closed); + let svg = render_to_svg(&expr).unwrap(); + assert!( + svg.contains("nonterminal"), + "expected nonterminal for e{{..=1}}, got: {svg}" + ); + } +} From a254e51c593ca558d2ffd637d2673d4051837b11 Mon Sep 17 00:00:00 2001 From: Travis Cross Date: Wed, 18 Feb 2026 00:29:05 +0000 Subject: [PATCH 11/12] Reject half-open range with zero maximum The half-open range `{..0}` means zero to less-than-zero repetitions, which is empty. An earlier commit rejects `{a..a}` when both bounds are present, but `{..0}` (with no lower bound) slipped through because the validation requires `Some(min)`. Let's close this gap. --- tools/grammar/src/parser.rs | 12 ++++++++++++ tools/mdbook-spec/src/grammar/render_railroad.rs | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/tools/grammar/src/parser.rs b/tools/grammar/src/parser.rs index 85e4310907..f65cb80f97 100644 --- a/tools/grammar/src/parser.rs +++ b/tools/grammar/src/parser.rs @@ -452,6 +452,9 @@ impl Parser<'_> { (Some(min), Some(max), RangeLimit::HalfOpen) if max <= min => { bail!(self, "half-open range maximum must be greater than minimum") } + (None, Some(0), RangeLimit::HalfOpen) => { + bail!(self, "half-open range `..0` is empty") + } (_, None, RangeLimit::Closed) => bail!(self, "closed range must have an upper bound"), _ => {} } @@ -715,6 +718,15 @@ mod tests { ); } + #[test] + fn test_range_err_half_open_zero_max() { + let err = parse("A -> x{..0}").unwrap_err(); + assert!( + err.contains("half-open range `..0` is empty"), + "expected half-open-zero error, got: {err}" + ); + } + // -- Valid edge cases ------------------------------------------- #[test] diff --git a/tools/mdbook-spec/src/grammar/render_railroad.rs b/tools/mdbook-spec/src/grammar/render_railroad.rs index c82136af71..ebb20af1bc 100644 --- a/tools/mdbook-spec/src/grammar/render_railroad.rs +++ b/tools/mdbook-spec/src/grammar/render_railroad.rs @@ -188,7 +188,7 @@ fn render_expression(expr: &Expression, cx: &RenderCtx, stack: bool) -> Option Date: Wed, 18 Feb 2026 00:57:26 +0000 Subject: [PATCH 12/12] Use equivalent range in inclusive example The inclusive example row in the notation table uses `HEX_DIGIT1..=6`, but the exclusive row already uses `HEX_DIGIT1..6`. Let's change the inclusive example to `1..=5` so that both rows describe the same range (1 through 5), making the equivalence between `1..6` and `1..=5` immediately visible. --- src/notation.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/notation.md b/src/notation.md index d3ad8362cc..850ee9fb5e 100644 --- a/src/notation.md +++ b/src/notation.md @@ -17,7 +17,7 @@ The following notations are used by the *Lexer* and *Syntax* grammar snippets: | x\* | _OuterAttribute_\* | 0 or more of x | | x+ | _MacroMatch_+ | 1 or more of x | | xa..b | HEX_DIGIT1..6 | a to b repetitions of x, exclusive of b | -| xa..=b | HEX_DIGIT1..=6 | a to b repetitions of x, inclusive of b | +| xa..=b | HEX_DIGIT1..=5 | a to b repetitions of x, inclusive of b | | Rule1 Rule2 | `fn` _Name_ _Parameters_ | Sequence of rules in order | | \| | `u8` \| `u16`, Block \| Item | Either one or another | | \[ ] | \[`b` `B`] | Any of the characters listed |