1- """Plugin to treat uppercase HTML tags as block-level raw HTML.
1+ """Plugin to treat uppercase HTML tags as block-level raw HTML, later
2+ to be rendered by Jx (if imports are set up).
23
34This allows custom component tags like <Card>, <Test>, <Header> to be
45treated as block HTML when they appear on their own (with blank lines
56around them), preventing markdown processing inside them.
67"""
78import re
9+ import typing as t
810
911from mistune import BlockParser , BlockState
1012from mistune .markdown import Markdown
1113
1214
13- def parse_custom_html (
14- block : BlockParser , m : re .Match [str ], state : BlockState
15- ) -> int :
15+ if t .TYPE_CHECKING :
16+ from mistune .core import BaseRenderer
17+
18+
19+ def parse_mdjx (block : BlockParser , m : re .Match [str ], state : BlockState ) -> int :
1620 """Parse uppercase HTML tags as block HTML."""
1721 text = m .group (0 )
1822 end_pos = m .end () + 1 # Position after trailing newline
23+ state .append_token ({"type" : "mdjx" , "raw" : text })
24+ return end_pos
1925
20- # Check if another uppercase tag follows immediately (no blank line).
21- # If so, don't add newline to raw (renderer will add one anyway).
22- remaining = state .src [end_pos :]
23- if re .match (r"[ ]{0,3}<[A-Z]" , remaining ):
24- # Another uppercase tag follows - don't add extra newline
25- state .append_token ({"type" : "block_html" , "raw" : text })
26- else :
27- # Followed by blank line or other content - add newline for proper spacing
28- state .append_token ({"type" : "block_html" , "raw" : text + "\n " })
2926
30- return end_pos
27+ def render_mdjx (renderer : "BaseRenderer" , raw : str ) -> str :
28+ return raw + "\n "
3129
3230
33- def custom_block_html (md : Markdown ) -> None :
31+ def mdjx (md : Markdown ) -> None :
3432 """Register the custom block HTML rule.
3533
3634 This rule matches HTML tags that start with an uppercase letter
@@ -40,15 +38,26 @@ def custom_block_html(md: Markdown) -> None:
4038 The content inside these tags is NOT processed as markdown.
4139 """
4240 # Pattern to match uppercase tags as block HTML
41+ # Supports both self-closing tags (<Tag />) and paired tags (<Tag>...</Tag>)
4342 # - ^[ ]{0,3} - start of line with up to 3 spaces (standard block indent)
43+ # Self-closing branch:
44+ # - <[A-Z][a-zA-Z0-9]* - opening tag starting with uppercase
45+ # - [^>]* - optional attributes (anything except >)
46+ # - /> - self-closing end
47+ # Paired tag branch:
4448 # - <(?P<_customtag>[A-Z][a-zA-Z0-9]*) - opening tag starting with uppercase
4549 # - (?:\s[^>]*)? - optional attributes
4650 # - > - close of opening tag
4751 # - [\s\S]*? - content (lazy match, including newlines)
4852 # - </(?P=_customtag)> - matching closing tag (backreference)
4953 # - [ \t]*$ - optional trailing whitespace, end of line
5054 pattern = (
51- r"^[ ]{0,3}<(?P<_customtag>[A-Z][a-zA-Z0-9]*)(?:\s[^>]*)?>"
52- r"[\s\S]*?</(?P=_customtag)>[ \t]*$"
55+ r"^[ ]{0,3}(?:"
56+ r"<[A-Z][a-zA-Z0-9]*[^>]*/>" # Self-closing tag
57+ r"|"
58+ r"<(?P<_customtag>[A-Z][a-zA-Z0-9]*)(?:\s[^>]*)?>[\s\S]*?</(?P=_customtag)>" # Paired tags
59+ r")[ \t]*$"
5360 )
54- md .block .register ("custom_html" , pattern , parse_custom_html , before = "raw_html" )
61+ md .block .register ("mdjx" , pattern , parse_mdjx , before = "raw_html" )
62+ if md .renderer and md .renderer .NAME == "html" :
63+ md .renderer .register ("mdjx" , render_mdjx )
0 commit comments