-
Notifications
You must be signed in to change notification settings - Fork 14
Expand file tree
/
Copy pathcli_usage_lexer.py
More file actions
115 lines (101 loc) · 3.78 KB
/
cli_usage_lexer.py
File metadata and controls
115 lines (101 loc) · 3.78 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
"""Pygments lexer for CLI usage/help output.
This module provides a custom Pygments lexer for highlighting command-line
usage text typically generated by argparse, getopt, or similar libraries.
"""
from __future__ import annotations
from pygments.lexer import RegexLexer, bygroups, include
from pygments.token import Generic, Name, Operator, Punctuation, Text, Whitespace
class CLIUsageLexer(RegexLexer):
"""Lexer for CLI usage/help text (argparse, etc.).
Highlights usage patterns including options, arguments, and meta-variables.
Examples
--------
>>> from pygments.token import Token
>>> lexer = CLIUsageLexer()
>>> tokens = list(lexer.get_tokens("usage: cmd [-h]"))
>>> tokens[0]
(Token.Generic.Heading, 'usage:')
>>> tokens[2]
(Token.Name.Label, 'cmd')
"""
name = "CLI Usage"
aliases = ["cli-usage", "usage"] # noqa: RUF012
filenames: list[str] = [] # noqa: RUF012
mimetypes = ["text/x-cli-usage"] # noqa: RUF012
tokens = { # noqa: RUF012
"root": [
# "usage:" at start of line
(r"^(usage:)(\s+)", bygroups(Generic.Heading, Whitespace)), # type: ignore[no-untyped-call]
# Continuation lines (leading whitespace for wrapped usage)
(r"^(\s+)(?=\S)", Whitespace),
include("inline"),
],
"inline": [
# Whitespace
(r"\s+", Whitespace),
# Long options with = value (e.g., --log-level=VALUE)
(
r"(--[a-zA-Z0-9][-a-zA-Z0-9]*)(=)([A-Z][A-Z0-9_]*|[a-z][-a-z0-9]*)",
bygroups(Name.Tag, Operator, Name.Variable), # type: ignore[no-untyped-call]
),
# Long options standalone
(r"--[a-zA-Z0-9][-a-zA-Z0-9]*", Name.Tag),
# Short options with space-separated value (e.g., -S socket-path)
(
r"(-[a-zA-Z0-9])(\s+)([A-Z][A-Z0-9_]*|[a-z][-a-z0-9]*)",
bygroups(Name.Attribute, Whitespace, Name.Variable), # type: ignore[no-untyped-call]
),
# Short options standalone
(r"-[a-zA-Z0-9]", Name.Attribute),
# UPPERCASE meta-variables (COMMAND, FILE, PATH)
(r"\b[A-Z][A-Z0-9_]+\b", Name.Constant),
# Opening bracket - enter optional state
(r"\[", Punctuation, "optional"),
# Closing bracket (fallback for unmatched)
(r"\]", Punctuation),
# Choice separator (pipe)
(r"\|", Operator),
# Parentheses for grouping
(r"[()]", Punctuation),
# Positional/command names (lowercase with dashes)
(r"\b[a-z][-a-z0-9]*\b", Name.Label),
# Catch-all for any other text
(r"[^\s\[\]|()]+", Text),
],
"optional": [
# Nested optional bracket
(r"\[", Punctuation, "#push"),
# End optional
(r"\]", Punctuation, "#pop"),
# Contents use inline rules
include("inline"),
],
}
def tokenize_usage(text: str) -> list[tuple[str, str]]:
"""Tokenize usage text and return list of (token_type, value) tuples.
Parameters
----------
text : str
CLI usage text to tokenize.
Returns
-------
list[tuple[str, str]]
List of (token_type_name, text_value) tuples.
Examples
--------
>>> result = tokenize_usage("usage: cmd [-h]")
>>> result[0]
('Token.Generic.Heading', 'usage:')
>>> result[2]
('Token.Name.Label', 'cmd')
>>> result[4]
('Token.Punctuation', '[')
>>> result[5]
('Token.Name.Attribute', '-h')
>>> result[6]
('Token.Punctuation', ']')
"""
lexer = CLIUsageLexer()
return [
(str(tok_type), tok_value) for tok_type, tok_value in lexer.get_tokens(text)
]