Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 64 additions & 13 deletions src/pack_liascript_course/pack_liascript_course.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,33 +133,84 @@ def fetch_bytes(source: str) -> bytes:
re.compile(r'<link\b[^>]+\bhref=["\']([^"\']+)["\']', re.IGNORECASE),
# HTML script src
re.compile(r'<script\b[^>]+\bsrc=["\']([^"\']+)["\']', re.IGNORECASE),
# LiaScript @import
re.compile(r'@import\s+["\']([^"\']+)["\']', re.IGNORECASE),
# HTML audio/video src
re.compile(r'<(?:audio|video|source)\b[^>]+\bsrc=["\']([^"\']+)["\']', re.IGNORECASE),
]

_ABSOLUTE_PREFIXES = ("http://", "https://", "//", "#", "mailto:", "data:")

# Regex that matches a YAML frontmatter block at the very start of a document.
_YAML_FRONTMATTER_RE = re.compile(r'^---[ \t]*\n(.*?\n)(?:---|\.\.\.)[ \t]*\n', re.DOTALL)

# LiaScript YAML header fields whose values may be relative file paths.
_YAML_LINK_FIELDS = frozenset({"import", "link", "script", "logo"})


def _extract_yaml_frontmatter_links(content: str) -> list[str]:
"""Return file references found in LiaScript YAML header fields.

Handles both scalar values (``field: path``) and YAML list items
(``- path``) for the fields ``import``, ``link``, ``script``, and
``logo``.
"""
match = _YAML_FRONTMATTER_RE.match(content)
if not match:
return []

frontmatter = match.group(1)
result: list[str] = []
current_field: str | None = None

for line in frontmatter.splitlines():
# A top-level field definition: "key: value" or "key:"
field_match = re.match(r'^([A-Za-z_]\w*)\s*:\s*(.*)', line)
if field_match:
field_name = field_match.group(1).lower()
field_value = field_match.group(2).strip().strip("\"'")

if field_name in _YAML_LINK_FIELDS:
current_field = field_name
if field_value: # Inline scalar: "field: value"
result.append(field_value)
else:
current_field = None
elif current_field is not None:
# Indented list item under the current field: " - value"
list_match = re.match(r'^\s+-\s+(.*)', line)
if list_match:
value = list_match.group(1).strip().strip("\"'")
if value:
result.append(value)
elif line and not line[0].isspace():
# Non-indented non-field line: we left the field's block.
current_field = None

return result


def extract_relative_links(content: str) -> list[str]:
"""Return a deduplicated list of relative asset paths referenced in *content*."""
seen: set[str] = set()
result: list[str] = []

def _add(raw: str) -> None:
path = raw.strip().split("?")[0].split("#")[0].strip()
if not path:
return
if any(path.startswith(prefix) for prefix in _ABSOLUTE_PREFIXES):
return
if path not in seen:
seen.add(path)
result.append(path)

# Links declared in the YAML frontmatter take priority.
for link in _extract_yaml_frontmatter_links(content):
_add(link)

# Links referenced in the document body via Markdown/HTML syntax.
for pattern in _LINK_PATTERNS:
for match in pattern.finditer(content):
raw = match.group(1).strip()
# Remove query string and fragment
path = raw.split("?")[0].split("#")[0].strip()
if not path:
continue
# Skip absolute links
if any(path.startswith(prefix) for prefix in _ABSOLUTE_PREFIXES):
continue
if path not in seen:
seen.add(path)
result.append(path)
_add(match.group(1))

return result

Expand Down
66 changes: 62 additions & 4 deletions tests/test_pack_liascript_course.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,16 +122,74 @@ def test_html_script(self):
links = packer.extract_relative_links(content)
assert "js/quiz.js" in links

def test_liascript_import(self):
content = "@import 'macros.md'"
# ------------------------------------------------------------------
# YAML frontmatter tests
# ------------------------------------------------------------------

def test_yaml_import_scalar(self):
content = "---\nimport: macros.md\n---\n# Body"
links = packer.extract_relative_links(content)
assert "macros.md" in links

def test_yaml_import_list(self):
content = "---\nimport:\n - macros.md\n - helpers.md\n---\n"
links = packer.extract_relative_links(content)
assert "macros.md" in links
assert "helpers.md" in links

def test_yaml_link_scalar(self):
content = "---\nlink: styles/custom.css\n---\n"
links = packer.extract_relative_links(content)
assert "styles/custom.css" in links

def test_yaml_link_list(self):
content = "---\nlink:\n - styles/a.css\n - styles/b.css\n---\n"
links = packer.extract_relative_links(content)
assert "styles/a.css" in links
assert "styles/b.css" in links

def test_yaml_script_scalar(self):
content = "---\nscript: js/init.js\n---\n"
links = packer.extract_relative_links(content)
assert "js/init.js" in links

def test_yaml_logo(self):
content = "---\nlogo: images/banner.jpg\n---\n"
links = packer.extract_relative_links(content)
assert "images/banner.jpg" in links

def test_liascript_import_double_quotes(self):
content = '@import "https://example.com/remote.md"'
def test_yaml_skips_absolute_import(self):
content = "---\nimport: https://example.com/remote.md\n---\n"
links = packer.extract_relative_links(content)
assert not links

def test_yaml_skips_absolute_import_in_list(self):
content = "---\nimport:\n - local.md\n - https://example.com/remote.md\n---\n"
links = packer.extract_relative_links(content)
assert "local.md" in links
assert "https://example.com/remote.md" not in links

def test_yaml_ignores_other_fields(self):
content = "---\ntitle: My Course\nauthor: Someone\n---\n"
links = packer.extract_relative_links(content)
assert not links

def test_yaml_and_body_deduplication(self):
content = "---\nlink: styles/custom.css\n---\n<link href=\"styles/custom.css\">"
links = packer.extract_relative_links(content)
assert "styles/custom.css" in links
assert len([l for l in links if l == "styles/custom.css"]) == 1

def test_no_yaml_frontmatter(self):
content = "# Just a plain markdown file\n![img](image.png)"
links = packer.extract_relative_links(content)
assert "image.png" in links

def test_yaml_quoted_values(self):
content = '---\nlogo: "images/logo.png"\n---\n'
links = packer.extract_relative_links(content)
assert "images/logo.png" in links

def test_skips_absolute_urls(self):
content = "![logo](https://example.com/logo.png)"
links = packer.extract_relative_links(content)
Expand Down