Skip to content
Open
18 changes: 15 additions & 3 deletions tools/hrw4u/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ SCRIPT_KG=scripts/hrw4u-kg
SHARED_FILES=src/common.py \
src/debugging.py \
src/errors.py \
src/formatters.py \
src/states.py \
src/tables.py \
src/types.py \
Expand Down Expand Up @@ -104,10 +105,21 @@ INIT_HRW4U=$(PKG_DIR_HRW4U)/__init__.py
INIT_U4WRH=$(PKG_DIR_U4WRH)/__init__.py
INIT_LSP=$(PKG_DIR_LSP)/__init__.py

.PHONY: all gen gen-fwd gen-inv copy-src test clean build package env setup-deps activate update coverage coverage-open
.PHONY: all gen gen-fwd gen-inv copy-src check-antlr test clean build package env setup-deps activate update coverage coverage-open

all: gen

# Fail fast with a helpful message if the ANTLR generator is not on PATH.
# Install is intentionally left to the user / bootstrap.sh — installers vary
# by OS (brew on macOS, dnf/apt on Linux, CI images pin their own).
check-antlr:
@command -v $(ANTLR) >/dev/null 2>&1 || { \
echo "Error: '$(ANTLR)' not found on PATH."; \
echo "Install it first (e.g. 'brew install antlr' on macOS),"; \
echo "or run ./bootstrap.sh which also sets up Python dependencies."; \
exit 1; \
}

# Orchestrate generation then copy sources and drop __main__.py in each package
gen: gen-fwd gen-inv copy-src $(MAIN_HRW4U) $(MAIN_U4WRH) $(MAIN_LSP) $(INIT_HRW4U) $(INIT_U4WRH) $(INIT_LSP)

Expand Down Expand Up @@ -135,7 +147,7 @@ $(INIT_LSP): | $(PKG_DIR_LSP)
touch $@

# Generate forward parser/lexer into build/hrw4u and build/hrw4u-lsp
gen-fwd: $(ANTLR_FILES_FWD)
gen-fwd: check-antlr $(ANTLR_FILES_FWD)

$(ANTLR_FILES_FWD): $(GRAMMAR_FWD)
@mkdir -p $(PKG_DIR_HRW4U)
Expand All @@ -144,7 +156,7 @@ $(ANTLR_FILES_FWD): $(GRAMMAR_FWD)
# LSP no longer generates its own ANTLR files - it imports from hrw4u

# Generate inverse parser/lexer into build/u4wrh
gen-inv: $(ANTLR_FILES_INV)
gen-inv: check-antlr $(ANTLR_FILES_INV)

$(ANTLR_FILES_INV): $(GRAMMAR_INV)
@mkdir -p $(PKG_DIR_U4WRH)
Expand Down
100 changes: 72 additions & 28 deletions tools/hrw4u/src/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from antlr4 import InputStream, CommonTokenStream

from hrw4u.errors import Hrw4uSyntaxError, ThrowingErrorListener, ErrorCollector, CollectingErrorListener
from hrw4u.formatters import FORMATTERS, ErrorFormatter
from hrw4u.types import MagicStrings


Expand Down Expand Up @@ -112,6 +113,43 @@ def fatal(message: str) -> NoReturn:
sys.exit(1)


def _build_formatter(error_format: str) -> ErrorFormatter:
"""Instantiate the configured error formatter, falling back to plain."""
return FORMATTERS.get(error_format, FORMATTERS["plain"])()


def emit_fatal_message(error_format: str, message: str, filename: str = SystemDefaults.DEFAULT_FILENAME) -> NoReturn:
"""Emit a non-syntax error (I/O, argument) via the chosen formatter and exit.

Plain mode preserves the legacy bare-string output. Structured formats wrap
the message as a synthetic diagnostic so downstream consumers always see the
same schema regardless of where the error originated.
"""
if error_format == 'plain':
print(message, file=sys.stderr)
else:
err = Hrw4uSyntaxError(filename, 0, 0, message, "")
collector = ErrorCollector(formatter=_build_formatter(error_format))
collector.add_error(err)
print(collector.get_error_summary(), file=sys.stderr)
sys.exit(1)


def emit_fatal_error(error_format: str, error: Hrw4uSyntaxError) -> NoReturn:
"""Emit a single Hrw4uSyntaxError via the chosen formatter and exit.

Plain mode keeps the legacy ``str(error)`` output (no ``Found 1 error:``
prefix) so existing CLI consumers see byte-identical output.
"""
if error_format == 'plain':
print(str(error), file=sys.stderr)
else:
collector = ErrorCollector(formatter=_build_formatter(error_format))
collector.add_error(error)
print(collector.get_error_summary(), file=sys.stderr)
sys.exit(1)


def create_base_parser(description: str) -> tuple[argparse.ArgumentParser, argparse._MutuallyExclusiveGroup]:
"""Create base argument parser with common options."""
parser = argparse.ArgumentParser(description=description, formatter_class=argparse.RawDescriptionHelpFormatter)
Expand Down Expand Up @@ -147,13 +185,14 @@ def create_parse_tree(
parser_class: type[ParserProtocol],
error_prefix: str,
collect_errors: bool = True,
max_errors: int = 5) -> tuple[Any, ParserProtocol, ErrorCollector | None]:
max_errors: int = 5,
error_format: str = "plain") -> tuple[Any, ParserProtocol, ErrorCollector | None]:
"""Create ANTLR parse tree from input content with optional error collection."""
input_stream = InputStream(content)
error_collector = None

if collect_errors:
error_collector = ErrorCollector(max_errors=max_errors)
error_collector = ErrorCollector(max_errors=max_errors, formatter=_build_formatter(error_format))
error_listener = CollectingErrorListener(filename=filename, error_collector=error_collector)
else:
error_listener = ThrowingErrorListener(filename=filename)
Expand Down Expand Up @@ -181,15 +220,15 @@ def create_parse_tree(
error_collector.add_error(e)
return None, parser_obj, error_collector
else:
fatal(str(e))
emit_fatal_error(error_format, e)
except Exception as e:
if collect_errors:
if error_collector:
syntax_error = Hrw4uSyntaxError(filename, 0, 0, f"{error_prefix} error: {e}", "")
error_collector.add_error(syntax_error)
return None, parser_obj, error_collector
else:
fatal(f"{filename}:0:0 - {error_prefix} error: {e}")
emit_fatal_message(error_format, f"{error_prefix} error: {e}", filename=filename)


def generate_output(
Expand Down Expand Up @@ -233,7 +272,9 @@ def generate_output(
syntax_error.add_note(note)
error_collector.add_error(syntax_error)
else:
fatal(str(e))
visitor_err = e if isinstance(e, Hrw4uSyntaxError) else Hrw4uSyntaxError(
filename, 0, 0, f"Visitor error: {e}", "")
emit_fatal_error(getattr(args, 'error_format', 'plain'), visitor_err)

if error_collector and (error_collector.has_errors() or error_collector.has_warnings()):
print(error_collector.get_error_summary(), file=sys.stderr)
Expand Down Expand Up @@ -289,6 +330,16 @@ def run_main(
default=5,
dest="max_errors",
help="Maximum number of errors to report before stopping (default: 5; ignored with --stop-on-error)")
parser.add_argument(
"--error-format",
choices=sorted(FORMATTERS.keys()),
default="plain",
dest="error_format",
help=(
"Format used for error and warning output on stderr (default: plain). "
"'json' emits one compact JSON object per input (NDJSON-friendly in bulk mode); "
"'markdown' emits a rendered report suitable for PR comments and chat. "
"Columns are always 0-based."))

if add_args is not None:
add_args(parser, output_group)
Expand All @@ -309,20 +360,18 @@ def run_main(
try:
content = pre_process(content, filename, args)
except Hrw4uSyntaxError as e:
print(str(e), file=sys.stderr)
sys.exit(1)
emit_fatal_error(args.error_format, e)
tree, parser_obj, error_collector = create_parse_tree(
content, filename, lexer_class, parser_class, error_prefix, not args.stop_on_error, args.max_errors)
content, filename, lexer_class, parser_class, error_prefix, not args.stop_on_error, args.max_errors, args.error_format)
generate_output(tree, parser_obj, visitor_class, filename, args, error_collector, extra_kwargs)
return

if any(':' in f for f in args.files):
for pair in args.files:
if ':' not in pair:
print(
f"Error: Mixed formats not allowed. All files must use 'input:output' format for bulk compilation.",
file=sys.stderr)
sys.exit(1)
emit_fatal_message(
args.error_format,
"Error: Mixed formats not allowed. All files must use 'input:output' format for bulk compilation.")

input_path, output_path = pair.split(':', 1)

Expand All @@ -331,20 +380,18 @@ def run_main(
content = input_file.read()
filename = input_path
except FileNotFoundError:
print(f"Error: Input file '{input_path}' not found", file=sys.stderr)
sys.exit(1)
emit_fatal_message(args.error_format, f"Error: Input file '{input_path}' not found", filename=input_path)
except Exception as e:
print(f"Error reading '{input_path}': {e}", file=sys.stderr)
sys.exit(1)
emit_fatal_message(args.error_format, f"Error reading '{input_path}': {e}", filename=input_path)

if pre_process is not None:
try:
content = pre_process(content, filename, args)
except Hrw4uSyntaxError as e:
print(str(e), file=sys.stderr)
sys.exit(1)
emit_fatal_error(args.error_format, e)
tree, parser_obj, error_collector = create_parse_tree(
content, filename, lexer_class, parser_class, error_prefix, not args.stop_on_error, args.max_errors)
content, filename, lexer_class, parser_class, error_prefix, not args.stop_on_error, args.max_errors,
args.error_format)

try:
with open(output_path, 'w', encoding='utf-8') as output_file:
Expand All @@ -355,8 +402,7 @@ def run_main(
finally:
sys.stdout = original_stdout
except Exception as e:
print(f"Error writing to '{output_path}': {e}", file=sys.stderr)
sys.exit(1)
emit_fatal_message(args.error_format, f"Error writing to '{output_path}': {e}", filename=output_path)
else:
for i, input_path in enumerate(args.files):
if i > 0:
Expand All @@ -367,19 +413,17 @@ def run_main(
content = input_file.read()
filename = input_path
except FileNotFoundError:
print(f"Error: Input file '{input_path}' not found", file=sys.stderr)
sys.exit(1)
emit_fatal_message(args.error_format, f"Error: Input file '{input_path}' not found", filename=input_path)
except Exception as e:
print(f"Error reading '{input_path}': {e}", file=sys.stderr)
sys.exit(1)
emit_fatal_message(args.error_format, f"Error reading '{input_path}': {e}", filename=input_path)

if pre_process is not None:
try:
content = pre_process(content, filename, args)
except Hrw4uSyntaxError as e:
print(str(e), file=sys.stderr)
sys.exit(1)
emit_fatal_error(args.error_format, e)
tree, parser_obj, error_collector = create_parse_tree(
content, filename, lexer_class, parser_class, error_prefix, not args.stop_on_error, args.max_errors)
content, filename, lexer_class, parser_class, error_prefix, not args.stop_on_error, args.max_errors,
args.error_format)

generate_output(tree, parser_obj, visitor_class, filename, args, error_collector, extra_kwargs)
43 changes: 12 additions & 31 deletions tools/hrw4u/src/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,13 @@

import re
from dataclasses import dataclass
from typing import Final
from typing import Final, TYPE_CHECKING

from antlr4.error.ErrorListener import ErrorListener

if TYPE_CHECKING:
from hrw4u.formatters import ErrorFormatter

_TOKEN_NAMES: Final[dict[str, str]] = {
'QUALIFIED_IDENT': "qualified name (e.g. 'Namespace::Name')",
'IDENT': 'identifier',
Expand Down Expand Up @@ -111,6 +114,7 @@ def __init__(self, filename: str, line: int, column: int, message: str, source_l
self.filename = filename
self.line = line
self.column = column
self.message = message
self.source_line = source_line


Expand Down Expand Up @@ -166,11 +170,12 @@ def from_ctx(cls, filename: str, ctx: object, message: str) -> Warning:
class ErrorCollector:
"""Collects multiple syntax errors and warnings for comprehensive reporting."""

def __init__(self, max_errors: int = 5) -> None:
def __init__(self, max_errors: int = 5, formatter: "ErrorFormatter | None" = None) -> None:
self.errors: list[Hrw4uSyntaxError] = []
self.max_errors = max_errors
self.warnings: list[Warning] = []
self._sandbox_message: str | None = None
self._formatter = formatter

def add_error(self, error: Hrw4uSyntaxError) -> None:
self.errors.append(error)
Expand All @@ -194,35 +199,11 @@ def has_warnings(self) -> bool:
return bool(self.warnings)

def get_error_summary(self) -> str:
if not self.errors and not self.warnings:
return "No errors found."

lines: list[str] = []

if self.errors:
count = len(self.errors)
lines.append(f"Found {count} error{'s' if count > 1 else ''}:")

for error in self.errors:
lines.append(str(error))
if hasattr(error, '__notes__') and error.__notes__:
lines.extend(error.__notes__)

if self.warnings:
if self.errors:
lines.append("")
count = len(self.warnings)
lines.append(f"{count} warning{'s' if count > 1 else ''}:")
lines.extend(w.format() for w in self.warnings)

if self.at_limit:
lines.append(f"(stopped after {self.max_errors} errors)")

if self._sandbox_message:
lines.append("")
lines.append(self._sandbox_message)

return "\n".join(lines)
formatter = self._formatter
if formatter is None:
from hrw4u.formatters import PlainTextFormatter
formatter = PlainTextFormatter()
return formatter.format_errors(self.errors, self.warnings, self._sandbox_message, self.at_limit, self.max_errors)


class CollectingErrorListener(ErrorListener):
Expand Down
Loading