|
4 | 4 | import six |
5 | 5 |
|
6 | 6 |
|
| 7 | +# General-purpose regex patterns |
7 | 8 | re_convert_heading = re.compile(r'convert_h(\d+)') |
8 | 9 | re_line_with_content = re.compile(r'^(.*)', flags=re.MULTILINE) |
9 | 10 | re_whitespace = re.compile(r'[\t ]+') |
10 | 11 | re_all_whitespace = re.compile(r'[\t \r\n]+') |
11 | 12 | re_newline_whitespace = re.compile(r'[\t \r\n]*[\r\n][\t \r\n]*') |
12 | 13 | re_html_heading = re.compile(r'h[1-6]') |
13 | 14 |
|
14 | | -# extract (leading_nl, content, trailing_nl) from a string |
| 15 | +# Pattern for creating convert_<tag> function names from tag names |
| 16 | +re_make_convert_fn_name = re.compile(r'[\[\]:-]') |
| 17 | + |
| 18 | +# Extract (leading_nl, content, trailing_nl) from a string |
15 | 19 | # (functionally equivalent to r'^(\n*)(.*?)(\n*)$', but greedy is faster than reluctant here) |
16 | 20 | re_extract_newlines = re.compile(r'^(\n*)((?:.*[^\n])?)(\n*)$', flags=re.DOTALL) |
17 | 21 |
|
| 22 | +# Patterns for escaping |
| 23 | +re_chars_to_escape = re.compile(r'([]\\&<`[>~=+|])') |
| 24 | +re_dash_sequences_to_escape = re.compile(r'(\s|^)(-+(?:\s|$))') |
| 25 | +re_hashes_to_escape = re.compile(r'(\s|^)(#{1,6}(?:\s|$))') |
| 26 | +re_list_items_to_escape = re.compile(r'((?:\s|^)[0-9]{1,9})([.)](?:\s|$))') |
18 | 27 |
|
19 | 28 | # Heading styles |
20 | 29 | ATX = 'atx' |
@@ -266,7 +275,7 @@ def _can_ignore(el): |
266 | 275 | text = ''.join(child_strings) |
267 | 276 |
|
268 | 277 | # apply this tag's final conversion function |
269 | | - convert_fn_name = "convert_%s" % re.sub(r"[\[\]:-]", "_", node.name) |
| 278 | + convert_fn_name = "convert_%s" % re_make_convert_fn_name.sub('_', node.name) |
270 | 279 | convert_fn = getattr(self, convert_fn_name, None) |
271 | 280 | if convert_fn and self.should_convert_tag(node.name): |
272 | 281 | text = convert_fn(node, text, parent_tags=parent_tags) |
@@ -351,20 +360,24 @@ def escape(self, text, parent_tags): |
351 | 360 | if not text: |
352 | 361 | return '' |
353 | 362 | if self.options['escape_misc']: |
354 | | - text = re.sub(r'([]\\&<`[>~=+|])', r'\\\1', text) |
355 | | - # A sequence of one or more consecutive '-', preceded and |
356 | | - # followed by whitespace or start/end of fragment, might |
357 | | - # be confused with an underline of a header, or with a |
| 363 | + # Escape miscellaneous special Markdown characters. |
| 364 | + text = re_chars_to_escape.sub(r'\\\1', text) |
| 365 | + |
| 366 | + # Escape sequence of one or more consecutive '-', preceded |
| 367 | + # and followed by whitespace or start/end of fragment, as it |
| 368 | + # might be confused with an underline of a header, or with a |
358 | 369 | # list marker. |
359 | | - text = re.sub(r'(\s|^)(-+(?:\s|$))', r'\1\\\2', text) |
360 | | - # A sequence of up to six consecutive '#', preceded and |
361 | | - # followed by whitespace or start/end of fragment, might |
362 | | - # be confused with an ATX heading. |
363 | | - text = re.sub(r'(\s|^)(#{1,6}(?:\s|$))', r'\1\\\2', text) |
364 | | - # '.' or ')' preceded by up to nine digits might be |
| 370 | + text = re_dash_sequences_to_escape.sub(r'\1\\\2', text) |
| 371 | + |
| 372 | + # Escape a sequence of up to six consecutive '#', preceded |
| 373 | + # and followed by whitespace or start/end of fragment, as |
| 374 | + # it might be confused with an ATX heading. |
| 375 | + text = re_hashes_to_escape.sub(r'\1\\\2', text) |
| 376 | + |
| 377 | + # Escape '.' or ')' preceded by up to nine digits, as it might be |
365 | 378 | # confused with a list item. |
366 | | - text = re.sub(r'((?:\s|^)[0-9]{1,9})([.)](?:\s|$))', r'\1\\\2', |
367 | | - text) |
| 379 | + text = re_list_items_to_escape.sub(r'\1\\\2', text) |
| 380 | + |
368 | 381 | if self.options['escape_asterisks']: |
369 | 382 | text = text.replace('*', r'\*') |
370 | 383 | if self.options['escape_underscores']: |
|
0 commit comments