From ceb7b8345abe178a95d4ed03ef2dfe82c8c7d29c Mon Sep 17 00:00:00 2001 From: matt rice Date: Tue, 3 Mar 2026 06:40:27 -0800 Subject: [PATCH] Add span information for action code, improve errors. This adds span information for rust action code in yacc files to the ast. This subsequently gets added to `YaccGrammar` then used for an error message in `CTParserBuilder`. --- cfgrammar/src/lib/yacc/ast.rs | 4 ++-- cfgrammar/src/lib/yacc/grammar.rs | 13 ++++++++++++- cfgrammar/src/lib/yacc/parser.rs | 14 ++++++++++---- lrpar/src/lib/ctbuilder.rs | 26 +++++++++++++++++++------- 4 files changed, 43 insertions(+), 14 deletions(-) diff --git a/cfgrammar/src/lib/yacc/ast.rs b/cfgrammar/src/lib/yacc/ast.rs index 8672b3b71..32e96d255 100644 --- a/cfgrammar/src/lib/yacc/ast.rs +++ b/cfgrammar/src/lib/yacc/ast.rs @@ -190,7 +190,7 @@ pub struct Rule { pub struct Production { pub symbols: Vec, pub precedence: Option, - pub action: Option, + pub action: Option<(String, Span)>, pub prod_span: Span, } @@ -271,7 +271,7 @@ impl GrammarAST { rule_name: String, symbols: Vec, precedence: Option, - action: Option, + action: Option<(String, Span)>, prod_span: Span, ) { self.rules[&rule_name].pidxs.push(self.prods.len()); diff --git a/cfgrammar/src/lib/yacc/grammar.rs b/cfgrammar/src/lib/yacc/grammar.rs index 0c2634249..ce323295e 100644 --- a/cfgrammar/src/lib/yacc/grammar.rs +++ b/cfgrammar/src/lib/yacc/grammar.rs @@ -81,6 +81,8 @@ pub struct YaccGrammar { implicit_rule: Option>, /// User defined Rust programs which can be called within actions actions: Box<[Option]>, + /// Spans for each action. + action_spans: Box<[Option]>, /// A `(name, type)` pair defining an extra parameter to pass to action functions. parse_param: Option<(String, String)>, /// Generic parameters (types and lifetimes) to pass to action functions. @@ -131,6 +133,7 @@ where prod_precs: Decode::decode(decoder)?, implicit_rule: Decode::decode(decoder)?, actions: Decode::decode(decoder)?, + action_spans: Decode::decode(decoder)?, parse_param: Decode::decode(decoder)?, parse_generics: Decode::decode(decoder)?, programs: Decode::decode(decoder)?, @@ -170,6 +173,7 @@ where prod_precs: ::bincode::BorrowDecode::<'_, __Context>::borrow_decode(decoder)?, implicit_rule: ::bincode::BorrowDecode::<'_, __Context>::borrow_decode(decoder)?, actions: ::bincode::BorrowDecode::<'_, __Context>::borrow_decode(decoder)?, + action_spans: ::bincode::BorrowDecode::<'_, __Context>::borrow_decode(decoder)?, parse_param: ::bincode::BorrowDecode::<'_, __Context>::borrow_decode(decoder)?, parse_generics: ::bincode::BorrowDecode::<'_, __Context>::borrow_decode(decoder)?, programs: ::bincode::BorrowDecode::<'_, __Context>::borrow_decode(decoder)?, @@ -328,6 +332,7 @@ where let mut prod_precs: Vec>> = vec![None; ast.prods.len()]; let mut prods_rules = vec![None; ast.prods.len()]; let mut actions = vec![None; ast.prods.len()]; + let mut action_spans = vec![None; ast.prods.len()]; let mut actiontypes = vec![None; rule_names.len()]; let (start_name, _) = ast.start.as_ref().unwrap(); for (astrulename, _) in &rule_names { @@ -419,8 +424,9 @@ where prods[pidx] = Some(prod); prod_precs[pidx] = Some(prec.map(|(prec, _)| prec)); prods_rules[pidx] = Some(ridx); - if let Some(ref s) = astprod.action { + if let Some((s, span)) = &astprod.action { actions[pidx] = Some(s.clone()); + action_spans[pidx] = Some(*span); } } } @@ -459,6 +465,7 @@ where prod_precs: prod_precs.into_iter().map(Option::unwrap).collect(), implicit_rule: implicit_rule.map(|x| rule_map[&x]), actions: actions.into_boxed_slice(), + action_spans: action_spans.into_boxed_slice(), parse_param: ast.parse_param.clone(), parse_generics: ast.parse_generics.clone(), programs: ast.programs.clone(), @@ -625,6 +632,10 @@ where &self.actions[usize::from(pidx)] } + pub fn action_span(&self, pidx: PIdx) -> Option { + self.action_spans[usize::from(pidx)] + } + pub fn actiontype(&self, ridx: RIdx) -> &Option { &self.actiontypes[usize::from(ridx)] } diff --git a/cfgrammar/src/lib/yacc/parser.rs b/cfgrammar/src/lib/yacc/parser.rs index 4cd5ca402..b3a95c715 100644 --- a/cfgrammar/src/lib/yacc/parser.rs +++ b/cfgrammar/src/lib/yacc/parser.rs @@ -734,10 +734,13 @@ impl YaccParser<'_> { pos_prod_end = Some(k); i = k; } else if self.lookahead_is("{", i).is_some() { + let pos_action_start = i + 1; pos_prod_end = Some(i); + // With j the location of the right brace, i the location of the left brace. let (j, a) = self.parse_action(i)?; i = self.parse_ws(j, true)?; - action = Some(a); + let action_span = Span::new(pos_action_start, pos_action_start + a.len()); + action = Some((a, action_span)); if !(self.lookahead_is("|", i).is_some() || self.lookahead_is(";", i).is_some()) { return Err(self.mk_error(YaccGrammarErrorKind::ProductionNotTerminated, i)); @@ -2284,13 +2287,15 @@ x" ", ) .unwrap(); + let action_str = "println!(\"test\");".to_string(); assert_eq!( grm.prods[grm.rules["A"].pidxs[0]].action, - Some("println!(\"test\");".to_string()) + Some((action_str.clone(), Span::new(34, 34 + action_str.len()))) ); + let action_str = "add($1, $2);".to_string(); assert_eq!( grm.prods[grm.rules["B"].pidxs[0]].action, - Some("add($1, $2);".to_string()) + Some((action_str.clone(), Span::new(90, 90 + action_str.len()))) ); assert_eq!(grm.prods[grm.rules["B"].pidxs[1]].action, None); } @@ -2302,9 +2307,10 @@ x" "%%A: '_' {(); // 🦀};", ) .unwrap(); + let action_str = "(); // 🦀".to_string(); assert_eq!( grm.prods[grm.rules["A"].pidxs[0]].action, - Some("(); // 🦀".to_string()) + Some((action_str.clone(), Span::new(10, 10 + action_str.len()))) ); } diff --git a/lrpar/src/lib/ctbuilder.rs b/lrpar/src/lib/ctbuilder.rs index 726908d1c..ec055afba 100644 --- a/lrpar/src/lib/ctbuilder.rs +++ b/lrpar/src/lib/ctbuilder.rs @@ -24,7 +24,7 @@ use crate::unstable_api::UnstableApi; use bincode::{Decode, Encode, decode_from_slice, encode_to_vec}; use cfgrammar::{ - Location, RIdx, Symbol, + Location, RIdx, Span, Symbol, header::{GrmtoolsSectionParser, Header, HeaderValue, Value}, markmap::{Entry, MergeBehavior}, yacc::{YaccGrammar, YaccKind, YaccOriginalActionKind, ast::ASTWithValidityInfo}, @@ -810,6 +810,7 @@ where &derived_mod_name, outp, &format!("/* CACHE INFORMATION {} */\n", cache), + &yacc_diag, )?; let conflicts = if stable.conflicts().is_some() { Some((sgraph, stable)) @@ -937,13 +938,14 @@ where mod_name: &str, outp_rs: P, cache: &str, + diag: &SpannedDiagnosticFormatter, ) -> Result<(), Box> { let visibility = self.visibility.clone(); let user_actions = if let Some( YaccKind::Original(YaccOriginalActionKind::UserAction) | YaccKind::Grmtools, ) = self.yacckind { - Some(self.gen_user_actions(grm)?) + Some(self.gen_user_actions(grm, diag)?) } else { None }; @@ -1419,7 +1421,11 @@ where } /// Generate the user action functions (if any). - fn gen_user_actions(&self, grm: &YaccGrammar) -> Result> { + fn gen_user_actions( + &self, + grm: &YaccGrammar, + diag: &SpannedDiagnosticFormatter, + ) -> Result> { let programs = grm .programs() .as_ref() @@ -1520,10 +1526,16 @@ where write!(outs, "{prefix}arg_", prefix = ACTION_PREFIX).ok(); last = last + off + "$".len(); } else { - panic!( - "Unknown text following '$' operator: {}", - &pre_action[last + off..] - ); + let span = grm.action_span(pidx).unwrap(); + let inner_span = + Span::new(span.start() + last + off + "$".len(), span.end()); + let mut s = String::from("\n"); + s.push_str(&diag.underline_span_with_text( + inner_span, + "Unknown text following '$'".to_string(), + '^', + )); + return Err(ErrorString(s).into()); } } None => {