From c6a7430ddef3625a8b101538179db7a4820ac596 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 4 May 2026 21:13:06 +0000
Subject: [PATCH 1/6] Initial plan


From 7b870ef5f046aca572b4ff50df0bae8781b1bfac Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 4 May 2026 21:15:55 +0000
Subject: [PATCH 2/6] =?UTF-8?q?=E2=9C=A8=20NEW:=20Allow=20plugins=20to=20r?=
 =?UTF-8?q?egister=20inline=20terminator=20characters?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Agent-Logs-Url: https://github.com/executablebooks/markdown-it-py/sessions/e9a49254-6b3a-4ecc-9b57-84f6df4e6ccd

Co-authored-by: chrisjsewell <2997570+chrisjsewell@users.noreply.github.com>
---
 docs/contributing.md                   | 11 +++--
 markdown_it/parser_inline.py           | 56 ++++++++++++++++++++++++++
 markdown_it/rules_inline/text.py       | 41 +------------------
 tests/test_api/test_plugin_creation.py | 43 ++++++++++++++++++++
 4 files changed, 108 insertions(+), 43 deletions(-)

diff --git a/docs/contributing.md b/docs/contributing.md
index 3a6d6aeb..4e3e3149 100644
--- a/docs/contributing.md
+++ b/docs/contributing.md
@@ -118,7 +118,12 @@ __Note:__ Don't try to replace text with HTML markup! That's not secure.
 
 ### Why is my inline rule not executed?
 
-The inline parser skips pieces of texts to optimize speed. It stops only on [a small set of chars](https://github.com/markdown-it/markdown-it/blob/master/lib/rules_inline/text.mjs), which can be tokens. We did not made this list extensible for performance reasons too.
+The inline parser skips pieces of texts to optimize speed. It stops only on [a small set of chars](https://github.com/executablebooks/markdown-it-py/blob/master/markdown_it/parser_inline.py), which can be tokens.
 
-If you are absolutely sure that something important is missing there - create a
-ticket and we will consider adding it as a new charcode.
+If your inline rule needs to trigger on a character that is not in the default terminator set, you can register it via `md.inline.add_terminator_char`:
+
+```python
+def my_plugin(md: MarkdownIt) -> None:
+    md.inline.add_terminator_char("w")  # stop text rule on 'w'
+    md.inline.ruler.push("my_rule", my_inline_rule)
+```
diff --git a/markdown_it/parser_inline.py b/markdown_it/parser_inline.py
index 26ec2e63..2cabaf67 100644
--- a/markdown_it/parser_inline.py
+++ b/markdown_it/parser_inline.py
@@ -2,6 +2,7 @@
 
 from __future__ import annotations
 
+import re
 from collections.abc import Callable
 from typing import TYPE_CHECKING
 
@@ -15,6 +16,39 @@
     from markdown_it import MarkdownIt
 
 
+# Default set of characters that terminate a text token and allow inline rules to fire.
+# '{}$%@~+=:' reserved for extensions.
+# Note: Don't confuse with "Markdown ASCII Punctuation" chars.
+# http://spec.commonmark.org/0.15/#ascii-punctuation-character
+_DEFAULT_TERMINATORS: frozenset[str] = frozenset(
+    {
+        "\n",
+        "!",
+        "#",
+        "$",
+        "%",
+        "&",
+        "*",
+        "+",
+        "-",
+        ":",
+        "<",
+        "=",
+        ">",
+        "@",
+        "[",
+        "\\",
+        "]",
+        "^",
+        "_",
+        "`",
+        "{",
+        "}",
+        "~",
+    }
+)
+
+
 # Parser rules
 RuleFuncInlineType = Callable[[StateInline, bool], bool]
 """(state: StateInline, silent: bool) -> matched: bool)
@@ -61,6 +95,28 @@ def __init__(self) -> None:
         self.ruler2 = Ruler[RuleFuncInline2Type]()
         for name, rule2 in _rules2:
             self.ruler2.push(name, rule2)
+        # Characters that stop the text rule, allowing other inline rules to fire.
+        self._terminator_chars: set[str] = set(_DEFAULT_TERMINATORS)
+        # Pre-compiled regex is kept in sync with _terminator_chars (updated eagerly in
+        # add_terminator_char) so there is no per-call None-check overhead in the hot path.
+        self.terminator_re: re.Pattern[str] = self._build_terminator_re()
+
+    def _build_terminator_re(self) -> re.Pattern[str]:
+        return re.compile(
+            "[" + re.escape("".join(sorted(self._terminator_chars))) + "]"
+        )
+
+    def add_terminator_char(self, ch: str) -> None:
+        """Register a character that stops the ``text`` rule, allowing inline rules to fire.
+
+        This lets plugins declare which characters their inline rules react to,
+        mirroring the ``MARKER`` mechanism in the Rust markdown-it implementation.
+
+        :param ch: A single character to add to the terminator set.
+        """
+        if ch not in self._terminator_chars:
+            self._terminator_chars.add(ch)
+            self.terminator_re = self._build_terminator_re()
 
     def skipToken(self, state: StateInline) -> None:
         """Skip single token by running all rules in validation mode;
diff --git a/markdown_it/rules_inline/text.py b/markdown_it/rules_inline/text.py
index 18b2fcc7..ef0cc9ce 100644
--- a/markdown_it/rules_inline/text.py
+++ b/markdown_it/rules_inline/text.py
@@ -1,54 +1,15 @@
-import functools
-import re
-
 # Skip text characters for text token, place those to pending buffer
 # and increment current pos
 from .state_inline import StateInline
 
 # Rule to skip pure text
-# '{}$%@~+=:' reserved for extensions
-
-# !!!! Don't confuse with "Markdown ASCII Punctuation" chars
-# http://spec.commonmark.org/0.15/#ascii-punctuation-character
-
-
-_TerminatorChars = {
-    "\n",
-    "!",
-    "#",
-    "$",
-    "%",
-    "&",
-    "*",
-    "+",
-    "-",
-    ":",
-    "<",
-    "=",
-    ">",
-    "@",
-    "[",
-    "\\",
-    "]",
-    "^",
-    "_",
-    "`",
-    "{",
-    "}",
-    "~",
-}
-
-
-@functools.cache
-def _terminator_char_regex() -> re.Pattern[str]:
-    return re.compile("[" + re.escape("".join(_TerminatorChars)) + "]")
 
 
 def text(state: StateInline, silent: bool) -> bool:
     pos = state.pos
     posMax = state.posMax
 
-    terminator_char = _terminator_char_regex().search(state.src, pos)
+    terminator_char = state.md.inline.terminator_re.search(state.src, pos)
     pos = terminator_char.start() if terminator_char else posMax
 
     if pos == state.pos:
diff --git a/tests/test_api/test_plugin_creation.py b/tests/test_api/test_plugin_creation.py
index d555be18..611ca4c1 100644
--- a/tests/test_api/test_plugin_creation.py
+++ b/tests/test_api/test_plugin_creation.py
@@ -89,3 +89,46 @@ def _plugin(_md: MarkdownIt) -> None:
 
     MarkdownIt().use(_plugin).parse("a")
     assert "plugin called" in capsys.readouterr().out
+
+
+def test_add_terminator_char():
+    """Test that add_terminator_char stops the text rule on a new character."""
+    hit_positions = []
+
+    def w_rule(state, silent):
+        if state.src[state.pos] != "w":
+            return False
+        hit_positions.append(state.pos)
+        state.pos += 1
+        return True
+
+    def _plugin(_md: MarkdownIt) -> None:
+        _md.inline.add_terminator_char("w")
+        _md.inline.ruler.before("text", "w_rule", w_rule)
+
+    md = MarkdownIt().use(_plugin)
+
+    # Without the terminator 'w' would be consumed as plain text;
+    # with it the rule fires for each 'w'.
+    md.render("awb")
+    assert len(hit_positions) > 0
+
+
+def test_add_terminator_char_idempotent():
+    """add_terminator_char with an already-present char should not rebuild the regex."""
+    md = MarkdownIt()
+    original_re = md.inline.terminator_re
+
+    # '\n' is already in the default set – adding it again must not rebuild
+    md.inline.add_terminator_char("\n")
+    assert md.inline.terminator_re is original_re
+
+
+def test_add_terminator_char_rebuilds():
+    """add_terminator_char with a new char should rebuild the regex."""
+    md = MarkdownIt()
+    original_re = md.inline.terminator_re
+
+    md.inline.add_terminator_char("w")
+    assert md.inline.terminator_re is not original_re
+    assert "w" in md.inline._terminator_chars

From 3a1fb69b75fdbbf67635f8bb1e601d667e87c4ee Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 4 May 2026 21:16:57 +0000
Subject: [PATCH 3/6] Address code review: remove sorted() overhead, tighten
 test assertion

Agent-Logs-Url: https://github.com/executablebooks/markdown-it-py/sessions/e9a49254-6b3a-4ecc-9b57-84f6df4e6ccd

Co-authored-by: chrisjsewell <2997570+chrisjsewell@users.noreply.github.com>
---
 markdown_it/parser_inline.py           | 2 +-
 tests/test_api/test_plugin_creation.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/markdown_it/parser_inline.py b/markdown_it/parser_inline.py
index 2cabaf67..0f0e5029 100644
--- a/markdown_it/parser_inline.py
+++ b/markdown_it/parser_inline.py
@@ -103,7 +103,7 @@ def __init__(self) -> None:
 
     def _build_terminator_re(self) -> re.Pattern[str]:
         return re.compile(
-            "[" + re.escape("".join(sorted(self._terminator_chars))) + "]"
+            "[" + re.escape("".join(self._terminator_chars)) + "]"
         )
 
     def add_terminator_char(self, ch: str) -> None:
diff --git a/tests/test_api/test_plugin_creation.py b/tests/test_api/test_plugin_creation.py
index 611ca4c1..41e5015e 100644
--- a/tests/test_api/test_plugin_creation.py
+++ b/tests/test_api/test_plugin_creation.py
@@ -109,9 +109,9 @@ def _plugin(_md: MarkdownIt) -> None:
     md = MarkdownIt().use(_plugin)
 
     # Without the terminator 'w' would be consumed as plain text;
-    # with it the rule fires for each 'w'.
+    # with it the rule fires exactly for the 'w' at position 1 in "awb".
     md.render("awb")
-    assert len(hit_positions) > 0
+    assert hit_positions == [1]
 
 
 def test_add_terminator_char_idempotent():

From 6100aeb9fa02367a9ea23f30e9de3349b751c39d Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 5 May 2026 03:16:10 +0000
Subject: [PATCH 4/6] Avoid per-instance set copy; share module-level default
 terminator regex

Agent-Logs-Url: https://github.com/executablebooks/markdown-it-py/sessions/913c6f4f-7b8b-496d-9626-f0e5b7c8bc0a

Co-authored-by: chrisjsewell <2997570+chrisjsewell@users.noreply.github.com>
---
 markdown_it/parser_inline.py           | 30 +++++++++++++++-----------
 tests/test_api/test_plugin_creation.py |  2 +-
 2 files changed, 19 insertions(+), 13 deletions(-)

diff --git a/markdown_it/parser_inline.py b/markdown_it/parser_inline.py
index 0f0e5029..938c5f93 100644
--- a/markdown_it/parser_inline.py
+++ b/markdown_it/parser_inline.py
@@ -48,6 +48,12 @@
     }
 )
 
+# Pre-compiled regex for the default terminator set.  Shared across all ParserInline
+# instances that have not had extra chars added, so __init__ pays no allocation cost.
+_default_terminator_re: re.Pattern[str] = re.compile(
+    "[" + re.escape("".join(_DEFAULT_TERMINATORS)) + "]"
+)
+
 
 # Parser rules
 RuleFuncInlineType = Callable[[StateInline, bool], bool]
@@ -96,15 +102,11 @@ def __init__(self) -> None:
         for name, rule2 in _rules2:
             self.ruler2.push(name, rule2)
         # Characters that stop the text rule, allowing other inline rules to fire.
-        self._terminator_chars: set[str] = set(_DEFAULT_TERMINATORS)
-        # Pre-compiled regex is kept in sync with _terminator_chars (updated eagerly in
-        # add_terminator_char) so there is no per-call None-check overhead in the hot path.
-        self.terminator_re: re.Pattern[str] = self._build_terminator_re()
-
-    def _build_terminator_re(self) -> re.Pattern[str]:
-        return re.compile(
-            "[" + re.escape("".join(self._terminator_chars)) + "]"
-        )
+        # _extra_terminator_chars is only allocated when add_terminator_char() is called
+        # with a char outside the defaults, keeping __init__ allocation-free.
+        self._extra_terminator_chars: set[str] = set()
+        # Pre-compiled regex shared with all default instances (no copy in the common path).
+        self.terminator_re: re.Pattern[str] = _default_terminator_re
 
     def add_terminator_char(self, ch: str) -> None:
         """Register a character that stops the ``text`` rule, allowing inline rules to fire.
@@ -114,9 +116,13 @@ def add_terminator_char(self, ch: str) -> None:
 
         :param ch: A single character to add to the terminator set.
         """
-        if ch not in self._terminator_chars:
-            self._terminator_chars.add(ch)
-            self.terminator_re = self._build_terminator_re()
+        if ch not in _DEFAULT_TERMINATORS and ch not in self._extra_terminator_chars:
+            self._extra_terminator_chars.add(ch)
+            self.terminator_re = re.compile(
+                "["
+                + re.escape("".join(_DEFAULT_TERMINATORS | self._extra_terminator_chars))
+                + "]"
+            )
 
     def skipToken(self, state: StateInline) -> None:
         """Skip single token by running all rules in validation mode;
diff --git a/tests/test_api/test_plugin_creation.py b/tests/test_api/test_plugin_creation.py
index 41e5015e..1970ced2 100644
--- a/tests/test_api/test_plugin_creation.py
+++ b/tests/test_api/test_plugin_creation.py
@@ -131,4 +131,4 @@ def test_add_terminator_char_rebuilds():
 
     md.inline.add_terminator_char("w")
     assert md.inline.terminator_re is not original_re
-    assert "w" in md.inline._terminator_chars
+    assert "w" in md.inline._extra_terminator_chars

From d0d8d464edc3143b8081f8df0291d189b9c0b16a Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 5 May 2026 03:40:12 +0000
Subject: [PATCH 5/6] Make default terminator regex a lazily-computed cached
 function

Agent-Logs-Url: https://github.com/executablebooks/markdown-it-py/sessions/04e84ce1-b921-4475-9f26-c0b296e29e33

Co-authored-by: chrisjsewell <2997570+chrisjsewell@users.noreply.github.com>
---
 markdown_it/parser_inline.py | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/markdown_it/parser_inline.py b/markdown_it/parser_inline.py
index 938c5f93..632a5442 100644
--- a/markdown_it/parser_inline.py
+++ b/markdown_it/parser_inline.py
@@ -2,6 +2,7 @@
 
 from __future__ import annotations
 
+import functools
 import re
 from collections.abc import Callable
 from typing import TYPE_CHECKING
@@ -48,11 +49,14 @@
     }
 )
 
-# Pre-compiled regex for the default terminator set.  Shared across all ParserInline
-# instances that have not had extra chars added, so __init__ pays no allocation cost.
-_default_terminator_re: re.Pattern[str] = re.compile(
-    "[" + re.escape("".join(_DEFAULT_TERMINATORS)) + "]"
-)
+# Lazily compiled regex for the default terminator set.  The @cache ensures it is
+# compiled at most once (on first ParserInline instantiation) and shared across all
+# instances that have not added extra chars, keeping __init__ cost near zero.
+@functools.cache
+def _default_terminator_re() -> re.Pattern[str]:
+    return re.compile(
+        "[" + re.escape("".join(_DEFAULT_TERMINATORS)) + "]"
+    )
 
 
 # Parser rules
@@ -106,7 +110,7 @@ def __init__(self) -> None:
         # with a char outside the defaults, keeping __init__ allocation-free.
         self._extra_terminator_chars: set[str] = set()
         # Pre-compiled regex shared with all default instances (no copy in the common path).
-        self.terminator_re: re.Pattern[str] = _default_terminator_re
+        self.terminator_re: re.Pattern[str] = _default_terminator_re()
 
     def add_terminator_char(self, ch: str) -> None:
         """Register a character that stops the ``text`` rule, allowing inline rules to fire.

From d5cf7ff5a663d23dac72bd68706a9671ac9e0542 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 5 May 2026 06:56:43 +0000
Subject: [PATCH 6/6] Fix pre-commit: ruff import ordering and formatting

Agent-Logs-Url: https://github.com/executablebooks/markdown-it-py/sessions/1c71001c-d7bd-4b35-8682-9c0afb71b1a9

Co-authored-by: chrisjsewell <2997570+chrisjsewell@users.noreply.github.com>
---
 markdown_it/parser_inline.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/markdown_it/parser_inline.py b/markdown_it/parser_inline.py
index 632a5442..8fabb988 100644
--- a/markdown_it/parser_inline.py
+++ b/markdown_it/parser_inline.py
@@ -2,9 +2,9 @@
 
 from __future__ import annotations
 
+from collections.abc import Callable
 import functools
 import re
-from collections.abc import Callable
 from typing import TYPE_CHECKING
 
 from . import rules_inline
@@ -49,14 +49,13 @@
     }
 )
 
+
 # Lazily compiled regex for the default terminator set.  The @cache ensures it is
 # compiled at most once (on first ParserInline instantiation) and shared across all
 # instances that have not added extra chars, keeping __init__ cost near zero.
 @functools.cache
 def _default_terminator_re() -> re.Pattern[str]:
-    return re.compile(
-        "[" + re.escape("".join(_DEFAULT_TERMINATORS)) + "]"
-    )
+    return re.compile("[" + re.escape("".join(_DEFAULT_TERMINATORS)) + "]")
 
 
 # Parser rules
@@ -124,7 +123,9 @@ def add_terminator_char(self, ch: str) -> None:
             self._extra_terminator_chars.add(ch)
             self.terminator_re = re.compile(
                 "["
-                + re.escape("".join(_DEFAULT_TERMINATORS | self._extra_terminator_chars))
+                + re.escape(
+                    "".join(_DEFAULT_TERMINATORS | self._extra_terminator_chars)
+                )
                 + "]"
             )