Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,12 @@ strip_document
within the document are unaffected.
Defaults to ``STRIP``.

strip_pre
Controls whether leading/trailing blank lines are removed from ``<pre>``
tags. Supported values are ``STRIP`` (all leading/trailing blank lines),
``STRIP_ONE`` (one leading/trailing blank line), and ``None`` (neither).
Defaults to ``STRIP``.

beautiful_soup_parser
Specify the Beautiful Soup parser to be used for interpreting HTML markup. Parsers such
as `html5lib`, `lxml` or even a custom parser as long as it is installed on the execution
Expand Down
31 changes: 30 additions & 1 deletion markdownify/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@
re_all_whitespace = re.compile(r'[\t \r\n]+')
re_newline_whitespace = re.compile(r'[\t \r\n]*[\r\n][\t \r\n]*')
re_html_heading = re.compile(r'h(\d+)')
re_pre_lstrip1 = re.compile(r'^ *\n')
re_pre_rstrip1 = re.compile(r'\n *$')
re_pre_lstrip = re.compile(r'^[ \n]*\n')
re_pre_rstrip = re.compile(r'[ \n]*$')

# Pattern for creating convert_<tag> function names from tag names
re_make_convert_fn_name = re.compile(r'[\[\]:-]')
Expand Down Expand Up @@ -51,10 +55,25 @@
ASTERISK = '*'
UNDERSCORE = '_'

# Document strip styles
# Document/pre strip styles
LSTRIP = 'lstrip'
RSTRIP = 'rstrip'
STRIP = 'strip'
STRIP_ONE = 'strip_one'


def strip1_pre(text):
"""Strip one leading and trailing newline from a <pre> string."""
text = re_pre_lstrip1.sub('', text)
text = re_pre_rstrip1.sub('', text)
return text


def strip_pre(text):
"""Strip all leading and trailing newlines from a <pre> string."""
text = re_pre_lstrip.sub('', text)
text = re_pre_rstrip.sub('', text)
return text


def chomp(text):
Expand Down Expand Up @@ -168,6 +187,7 @@ class DefaultOptions:
newline_style = SPACES
strip = None
strip_document = STRIP
strip_pre = STRIP
strong_em_symbol = ASTERISK
sub_symbol = ''
sup_symbol = ''
Expand Down Expand Up @@ -652,6 +672,15 @@ def convert_pre(self, el, text, parent_tags):
if self.options['code_language_callback']:
code_language = self.options['code_language_callback'](el) or code_language

if self.options['strip_pre'] == STRIP:
text = strip_pre(text) # remove all leading/trailing newlines
elif self.options['strip_pre'] == STRIP_ONE:
text = strip1_pre(text) # remove one leading/trailing newline
elif self.options['strip_pre'] is None:
pass # leave leading and trailing newlines as-is
else:
raise ValueError('Invalid value for strip_pre: %s' % self.options['strip_pre'])

return '\n\n```%s\n%s\n```\n\n' % (code_language, text)

def convert_q(self, el, text, parent_tags):
Expand Down
9 changes: 8 additions & 1 deletion tests/test_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
Test whitelisting/blacklisting of specific tags.

"""
from markdownify import markdownify, LSTRIP, RSTRIP, STRIP
from markdownify import markdownify, LSTRIP, RSTRIP, STRIP, STRIP_ONE
from .utils import md


Expand Down Expand Up @@ -32,3 +32,10 @@ def test_strip_document():
assert markdownify("<p>Hello</p>", strip_document=RSTRIP) == "\n\nHello"
assert markdownify("<p>Hello</p>", strip_document=STRIP) == "Hello"
assert markdownify("<p>Hello</p>", strip_document=None) == "\n\nHello\n\n"


def test_strip_pre():
assert markdownify("<pre> \n \n Hello \n \n </pre>") == "```\n Hello\n```"
assert markdownify("<pre> \n \n Hello \n \n </pre>", strip_pre=STRIP) == "```\n Hello\n```"
assert markdownify("<pre> \n \n Hello \n \n </pre>", strip_pre=STRIP_ONE) == "```\n \n Hello \n \n```"
assert markdownify("<pre> \n \n Hello \n \n </pre>", strip_pre=None) == "```\n \n \n Hello \n \n \n```"
2 changes: 1 addition & 1 deletion tests/test_conversions.py
Original file line number Diff line number Diff line change
Expand Up @@ -370,4 +370,4 @@ def test_spaces():
assert md('test <blockquote> text </blockquote> after') == 'test\n> text\n\nafter'
assert md(' <ol> <li> x </li> <li> y </li> </ol> ') == '\n\n1. x\n2. y\n'
assert md(' <ul> <li> x </li> <li> y </li> </ol> ') == '\n\n* x\n* y\n'
assert md('test <pre> foo </pre> bar') == 'test\n\n```\n foo \n```\n\nbar'
assert md('test <pre> foo </pre> bar') == 'test\n\n```\n foo\n```\n\nbar'