From 6feb3f664edc9d04d3ad31ad87e91093e0d20e63 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 13 Feb 2026 12:11:16 +0000 Subject: [PATCH 1/3] Initial plan From 9e4ef4484da2e352ca6edc65edfd8f1b7430f1e1 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 13 Feb 2026 12:16:57 +0000 Subject: [PATCH 2/3] Implement ext-link validations with model and tests (all 26 tests passing) Co-authored-by: robertatakenaka <505143+robertatakenaka@users.noreply.github.com> --- packtools/sps/models/ext_link.py | 151 ++++++ packtools/sps/validation/ext_link.py | 421 +++++++++++++++++ tests/sps/validation/test_ext_link.py | 636 ++++++++++++++++++++++++++ 3 files changed, 1208 insertions(+) create mode 100644 packtools/sps/models/ext_link.py create mode 100644 packtools/sps/validation/ext_link.py create mode 100644 tests/sps/validation/test_ext_link.py diff --git a/packtools/sps/models/ext_link.py b/packtools/sps/models/ext_link.py new file mode 100644 index 000000000..c6c210177 --- /dev/null +++ b/packtools/sps/models/ext_link.py @@ -0,0 +1,151 @@ +""" +Model for extracting ext-link elements from XML documents. + + is used for marking external links/hyperlinks in scientific articles. +It ensures accessibility and interoperability of external references. + +Example: + + SciELO Brasil + +""" +from packtools.sps.models.article_and_subarticles import Fulltext +from packtools.sps.utils.xml_utils import ( + node_plain_text, + tostring, + remove_namespaces, +) + + +class ExtLink(Fulltext): + """ + Extracts ext-link elements from article or sub-article nodes. + + Processes article/sub-article and provides methods to extract + ext-link information including attributes and text content. + """ + + @property + def parent_data(self): + """Returns parent context data for validation.""" + return self.attribs_parent_prefixed + + @property + def ext_links(self): + """ + Extract all ext-link elements with their attributes and context. + + Returns + ------- + list + List of dictionaries containing ext-link information: + - ext_link_type: Value of @ext-link-type attribute + - xlink_href: Value of @xlink:href attribute + - xlink_title: Value of @xlink:title attribute (optional) + - text: Plain text content of the element + - parent: Parent element tag + - parent_id: Parent element id + - parent_lang: Parent element language + - parent_article_type: Parent article type + + Example: + [ + { + 'ext_link_type': 'uri', + 'xlink_href': 'https://www.scielo.br/', + 'xlink_title': 'SciELO Platform', + 'text': 'SciELO Brasil', + 'parent': 'article', + 'parent_id': None, + 'parent_lang': 'en', + 'parent_article_type': 'research-article' + } + ] + """ + # Search in front, body and back sections + nodes = self.node.xpath("./front | ./front-stub | ./body | ./back") + + for node in nodes: + for ext_link in node.xpath(".//ext-link"): + ext_link_data = {} + + # Extract attributes, handling namespace prefixes + for key, value in ext_link.attrib.items(): + if "}" in key: + # Remove namespace prefix (e.g., {http://www.w3.org/1999/xlink}href -> href) + key = key.split("}")[-1] + + # Map xlink attributes to snake_case + if key == "href": + ext_link_data["xlink_href"] = value + elif key == "title": + ext_link_data["xlink_title"] = value + elif key == "ext-link-type": + ext_link_data["ext_link_type"] = value + else: + ext_link_data[key] = value + + # Set None for missing attributes + ext_link_data.setdefault("ext_link_type", None) + ext_link_data.setdefault("xlink_href", None) + ext_link_data.setdefault("xlink_title", None) + + # Extract text content + ext_link_data["text"] = node_plain_text(ext_link) or "" + + # Add parent context + ext_link_data.update(self.parent_data) + + yield ext_link_data + + @property + def fulltexts(self): + """Recursively process sub-articles.""" + for node in self.node.xpath("sub-article"): + yield ExtLink(node) + + +class ArticleExtLinks: + """ + Main class for extracting all ext-links from an XML document. + + Processes main article and all sub-articles to extract ext-link information. + """ + + def __init__(self, xmltree): + """ + Initialize with XML tree. + + Parameters + ---------- + xmltree : lxml.etree._Element + The root element of the XML document + """ + self.xmltree = xmltree + self._ext_link = ExtLink(xmltree) + + @property + def ext_links(self): + """ + Extract all ext-links from main article and sub-articles. + + Returns + ------- + list + List of all ext-link dictionaries from article and sub-articles + """ + ext_links = [] + + # Main article ext-links + ext_links.extend(list(self._ext_link.ext_links)) + + # Sub-articles ext-links (recursively) + for fulltext in self._ext_link.fulltexts: + ext_links.extend(list(fulltext.ext_links)) + # Handle nested sub-articles + for nested_fulltext in fulltext.fulltexts: + ext_links.extend(list(nested_fulltext.ext_links)) + + return ext_links diff --git a/packtools/sps/validation/ext_link.py b/packtools/sps/validation/ext_link.py new file mode 100644 index 000000000..871265c7e --- /dev/null +++ b/packtools/sps/validation/ext_link.py @@ -0,0 +1,421 @@ +""" +Validation for ext-link elements according to SPS 1.10 specification. + +Implements validations for external link elements to ensure: +- Mandatory attributes are present (@ext-link-type, @xlink:href) +- URL format is valid (starts with http:// or https://) +- ext-link-type values are in allowed list +- Link text is descriptive (accessibility) +- @xlink:title is present when text is generic or URL + +Reference: https://docs.google.com/document/d/1GTv4Inc2LS_AXY-ToHT3HmO66UT0VAHWJNOIqzBNSgA/edit?tab=t.0#heading=h.n2z5yrri2aba +""" +import re +from packtools.sps.models.ext_link import ArticleExtLinks +from packtools.sps.validation.utils import build_response + + +class ExtLinkValidation: + """ + Validates ext-link elements in scientific article XML. + + Validates presence of mandatory attributes, URL format, allowed values, + and accessibility requirements (descriptive text). + """ + + # Generic phrases to avoid (case-insensitive) + GENERIC_PHRASES = [ + "leia mais", + "clique aqui", + "acesse", + "veja mais", + "saiba mais", + "click here", + "read more", + "see more", + "learn more", + "more info", + "mais informações", + ] + + # Allowed values for @ext-link-type + ALLOWED_EXT_LINK_TYPES = [ + "uri", + "doi", + "pmid", + "pmcid", + "clinical-trial", + ] + + def __init__(self, xmltree, params=None): + """ + Initialize validation with XML tree and optional parameters. + + Parameters + ---------- + xmltree : lxml.etree._Element + The root element of the XML document + params : dict, optional + Configuration parameters including error levels + """ + self.params = params or {} + self.params.setdefault("ext_link_type_error_level", "CRITICAL") + self.params.setdefault("xlink_href_error_level", "CRITICAL") + self.params.setdefault("xlink_href_format_error_level", "ERROR") + self.params.setdefault("ext_link_type_value_error_level", "ERROR") + self.params.setdefault("descriptive_text_error_level", "WARNING") + self.params.setdefault("xlink_title_error_level", "WARNING") + + self.xmltree = xmltree + self.ext_links_model = ArticleExtLinks(xmltree) + + def validate_ext_link_type_presence(self, error_level=None): + """ + Validate presence of @ext-link-type attribute (CRITICAL). + + SPS Rule: @ext-link-type is mandatory in all elements. + + Parameters + ---------- + error_level : str, optional + Override default error level (default: "CRITICAL") + + Yields + ------ + dict + Validation result for each ext-link + """ + error_level = error_level or self.params.get("ext_link_type_error_level", "CRITICAL") + + for ext_link in self.ext_links_model.ext_links: + ext_link_type = ext_link.get("ext_link_type") + text = ext_link.get("text", "")[:50] # First 50 chars for context + + is_valid = bool(ext_link_type) + + advice_text = 'Add @ext-link-type attribute to with text "{text}". Valid values: {allowed_values}' + advice_params = { + "text": text, + "allowed_values": ", ".join(self.ALLOWED_EXT_LINK_TYPES), + } + + parent = { + "parent": ext_link.get("parent"), + "parent_id": ext_link.get("parent_id"), + "parent_article_type": ext_link.get("parent_article_type"), + "parent_lang": ext_link.get("parent_lang"), + } + + yield build_response( + title="@ext-link-type attribute", + parent=parent, + item="ext-link", + sub_item="@ext-link-type", + validation_type="exist", + is_valid=is_valid, + expected="@ext-link-type attribute present", + obtained=ext_link_type, + advice=f'Add @ext-link-type attribute to with text "{text}". Valid values: {", ".join(self.ALLOWED_EXT_LINK_TYPES)}', + data=ext_link, + error_level=error_level, + advice_text=advice_text, + advice_params=advice_params, + ) + + def validate_xlink_href_presence(self, error_level=None): + """ + Validate presence of @xlink:href attribute (CRITICAL). + + SPS Rule: @xlink:href is mandatory in all elements. + + Parameters + ---------- + error_level : str, optional + Override default error level (default: "CRITICAL") + + Yields + ------ + dict + Validation result for each ext-link + """ + error_level = error_level or self.params.get("xlink_href_error_level", "CRITICAL") + + for ext_link in self.ext_links_model.ext_links: + xlink_href = ext_link.get("xlink_href") + text = ext_link.get("text", "")[:50] + + is_valid = bool(xlink_href) + + advice_text = 'Add @xlink:href attribute to with text "{text}"' + advice_params = { + "text": text, + } + + parent = { + "parent": ext_link.get("parent"), + "parent_id": ext_link.get("parent_id"), + "parent_article_type": ext_link.get("parent_article_type"), + "parent_lang": ext_link.get("parent_lang"), + } + + yield build_response( + title="@xlink:href attribute", + parent=parent, + item="ext-link", + sub_item="@xlink:href", + validation_type="exist", + is_valid=is_valid, + expected="@xlink:href attribute present", + obtained=xlink_href, + advice=f'Add @xlink:href attribute to with text "{text}"', + data=ext_link, + error_level=error_level, + advice_text=advice_text, + advice_params=advice_params, + ) + + def validate_xlink_href_format(self, error_level=None): + """ + Validate @xlink:href URL format (ERROR). + + SPS Rule: @xlink:href must be a complete URL starting with http:// or https:// + + Parameters + ---------- + error_level : str, optional + Override default error level (default: "ERROR") + + Yields + ------ + dict + Validation result for each ext-link with xlink:href + """ + error_level = error_level or self.params.get("xlink_href_format_error_level", "ERROR") + + for ext_link in self.ext_links_model.ext_links: + xlink_href = ext_link.get("xlink_href") + + # Skip if xlink:href is not present (handled by another validation) + if not xlink_href: + continue + + # Check if URL starts with http:// or https:// + is_valid = bool(re.match(r'^https?://', xlink_href, re.IGNORECASE)) + + advice_text = 'URL in @xlink:href="{xlink_href}" must start with http:// or https://' + advice_params = { + "xlink_href": xlink_href, + } + + parent = { + "parent": ext_link.get("parent"), + "parent_id": ext_link.get("parent_id"), + "parent_article_type": ext_link.get("parent_article_type"), + "parent_lang": ext_link.get("parent_lang"), + } + + yield build_response( + title="@xlink:href URL format", + parent=parent, + item="ext-link", + sub_item="@xlink:href", + validation_type="format", + is_valid=is_valid, + expected="URL starting with http:// or https://", + obtained=xlink_href, + advice=f'URL in @xlink:href="{xlink_href}" must start with http:// or https://', + data=ext_link, + error_level=error_level, + advice_text=advice_text, + advice_params=advice_params, + ) + + def validate_ext_link_type_value(self, error_level=None): + """ + Validate @ext-link-type value is in allowed list (ERROR). + + SPS Rule: @ext-link-type must be one of: uri, doi, pmid, pmcid, clinical-trial + + Parameters + ---------- + error_level : str, optional + Override default error level (default: "ERROR") + + Yields + ------ + dict + Validation result for each ext-link with ext-link-type + """ + error_level = error_level or self.params.get("ext_link_type_value_error_level", "ERROR") + + for ext_link in self.ext_links_model.ext_links: + ext_link_type = ext_link.get("ext_link_type") + + # Skip if ext-link-type is not present (handled by another validation) + if not ext_link_type: + continue + + is_valid = ext_link_type in self.ALLOWED_EXT_LINK_TYPES + + advice_text = 'Replace @ext-link-type="{ext_link_type}" with one of: {allowed_values}' + advice_params = { + "ext_link_type": ext_link_type, + "allowed_values": ", ".join(self.ALLOWED_EXT_LINK_TYPES), + } + + parent = { + "parent": ext_link.get("parent"), + "parent_id": ext_link.get("parent_id"), + "parent_article_type": ext_link.get("parent_article_type"), + "parent_lang": ext_link.get("parent_lang"), + } + + yield build_response( + title="@ext-link-type value", + parent=parent, + item="ext-link", + sub_item="@ext-link-type", + validation_type="value in list", + is_valid=is_valid, + expected=", ".join(self.ALLOWED_EXT_LINK_TYPES), + obtained=ext_link_type, + advice=f'Replace @ext-link-type="{ext_link_type}" with one of: {", ".join(self.ALLOWED_EXT_LINK_TYPES)}', + data=ext_link, + error_level=error_level, + advice_text=advice_text, + advice_params=advice_params, + ) + + def validate_descriptive_text(self, error_level=None): + """ + Validate link text is descriptive, not generic (WARNING). + + SPS Rule: Text should not be generic phrases like "click here", "read more", etc. + Validation is case-insensitive. + + Parameters + ---------- + error_level : str, optional + Override default error level (default: "WARNING") + + Yields + ------ + dict + Validation result for each ext-link with text + """ + error_level = error_level or self.params.get("descriptive_text_error_level", "WARNING") + + for ext_link in self.ext_links_model.ext_links: + text = ext_link.get("text", "").strip() + + # Skip if text is empty (no validation needed) + if not text: + continue + + # Check if text is generic (case-insensitive) + text_lower = text.lower() + is_generic = any(phrase in text_lower for phrase in self.GENERIC_PHRASES) + is_valid = not is_generic + + # Only yield if text is generic + if is_generic: + advice_text = 'Replace generic text "{text}" in with descriptive text, or add @xlink:title attribute' + advice_params = { + "text": text, + } + + parent = { + "parent": ext_link.get("parent"), + "parent_id": ext_link.get("parent_id"), + "parent_article_type": ext_link.get("parent_article_type"), + "parent_lang": ext_link.get("parent_lang"), + } + + yield build_response( + title="descriptive link text", + parent=parent, + item="ext-link", + sub_item="text()", + validation_type="value", + is_valid=is_valid, + expected="descriptive text (not generic)", + obtained=text, + advice=f'Replace generic text "{text}" in with descriptive text, or add @xlink:title attribute', + data=ext_link, + error_level=error_level, + advice_text=advice_text, + advice_params=advice_params, + ) + + def validate_xlink_title_when_generic(self, error_level=None): + """ + Validate @xlink:title presence when text is generic or URL (WARNING). + + SPS Rule: When text is generic or is the URL itself, @xlink:title should + be present with a description of the link destination. + + Parameters + ---------- + error_level : str, optional + Override default error level (default: "WARNING") + + Yields + ------ + dict + Validation result for ext-links with generic/URL text + """ + error_level = error_level or self.params.get("xlink_title_error_level", "WARNING") + + for ext_link in self.ext_links_model.ext_links: + text = ext_link.get("text", "").strip() + xlink_href = ext_link.get("xlink_href", "") + xlink_title = ext_link.get("xlink_title") + + # Skip if text is empty + if not text: + continue + + # Check if text is generic + text_lower = text.lower() + is_generic = any(phrase in text_lower for phrase in self.GENERIC_PHRASES) + + # Check if text is the URL itself (or similar) + is_url_text = xlink_href and (text in xlink_href or xlink_href in text) + + # Only check if text is generic or is URL + if not (is_generic or is_url_text): + continue + + is_valid = bool(xlink_title) + + # Only yield if xlink:title is missing + if not is_valid: + reason = "generic" if is_generic else "URL" + advice_text = 'Add @xlink:title attribute to with {reason} text "{text}" to describe link destination' + advice_params = { + "reason": reason, + "text": text, + } + + parent = { + "parent": ext_link.get("parent"), + "parent_id": ext_link.get("parent_id"), + "parent_article_type": ext_link.get("parent_article_type"), + "parent_lang": ext_link.get("parent_lang"), + } + + yield build_response( + title="@xlink:title for generic/URL text", + parent=parent, + item="ext-link", + sub_item="@xlink:title", + validation_type="exist", + is_valid=is_valid, + expected="@xlink:title attribute when text is generic or URL", + obtained=xlink_title, + advice=f'Add @xlink:title attribute to with {reason} text "{text}" to describe link destination', + data=ext_link, + error_level=error_level, + advice_text=advice_text, + advice_params=advice_params, + ) diff --git a/tests/sps/validation/test_ext_link.py b/tests/sps/validation/test_ext_link.py new file mode 100644 index 000000000..108732bda --- /dev/null +++ b/tests/sps/validation/test_ext_link.py @@ -0,0 +1,636 @@ +""" +Unit tests for ext-link validation according to SPS 1.10. + +Tests cover mandatory attributes, URL format, allowed values, +and accessibility requirements (descriptive text). +""" +import unittest +from lxml import etree + +from packtools.sps.validation.ext_link import ExtLinkValidation + + +def filter_results(results): + """Filter out None values from validator results.""" + return [r for r in results if r is not None] + + +class TestExtLinkValidation(unittest.TestCase): + """Tests for ext-link element validations.""" + + # ========== TESTS: @ext-link-type PRESENCE (CRITICAL) ========== + + def test_validate_ext_link_type_presence_valid(self): + """Test valid ext-link with @ext-link-type attribute.""" + xml_content = """ +
+ +

+ + SciELO Brasil + +

+ +
+ """ + xmltree = etree.fromstring(xml_content) + validator = ExtLinkValidation(xmltree) + + results = filter_results(validator.validate_ext_link_type_presence()) + + self.assertEqual(len(results), 1) + self.assertEqual(results[0]["response"], "OK") + + def test_validate_ext_link_type_presence_missing(self): + """Test ext-link without @ext-link-type attribute (CRITICAL error).""" + xml_content = """ +
+ +

+ + SciELO Brasil + +

+ +
+ """ + xmltree = etree.fromstring(xml_content) + validator = ExtLinkValidation(xmltree) + + results = filter_results(validator.validate_ext_link_type_presence()) + + self.assertEqual(len(results), 1) + self.assertEqual(results[0]["response"], "CRITICAL") + self.assertIn("Add @ext-link-type attribute", results[0]["advice"]) + self.assertIsNotNone(results[0]["adv_text"]) + self.assertIsNotNone(results[0]["adv_params"]) + self.assertIn("text", results[0]["adv_params"]) + + # ========== TESTS: @xlink:href PRESENCE (CRITICAL) ========== + + def test_validate_xlink_href_presence_valid(self): + """Test valid ext-link with @xlink:href attribute.""" + xml_content = """ +
+ +

+ + SciELO Brasil + +

+ +
+ """ + xmltree = etree.fromstring(xml_content) + validator = ExtLinkValidation(xmltree) + + results = filter_results(validator.validate_xlink_href_presence()) + + self.assertEqual(len(results), 1) + self.assertEqual(results[0]["response"], "OK") + + def test_validate_xlink_href_presence_missing(self): + """Test ext-link without @xlink:href attribute (CRITICAL error).""" + xml_content = """ +
+ +

+ + SciELO Brasil + +

+ +
+ """ + xmltree = etree.fromstring(xml_content) + validator = ExtLinkValidation(xmltree) + + results = filter_results(validator.validate_xlink_href_presence()) + + self.assertEqual(len(results), 1) + self.assertEqual(results[0]["response"], "CRITICAL") + self.assertIn("Add @xlink:href attribute", results[0]["advice"]) + self.assertIsNotNone(results[0]["adv_text"]) + self.assertIsNotNone(results[0]["adv_params"]) + + # ========== TESTS: @xlink:href FORMAT (ERROR) ========== + + def test_validate_xlink_href_format_valid_http(self): + """Test @xlink:href with valid http:// URL.""" + xml_content = """ +
+ +

+ + SciELO Brasil + +

+ +
+ """ + xmltree = etree.fromstring(xml_content) + validator = ExtLinkValidation(xmltree) + + results = filter_results(validator.validate_xlink_href_format()) + + self.assertEqual(len(results), 1) + self.assertEqual(results[0]["response"], "OK") + + def test_validate_xlink_href_format_valid_https(self): + """Test @xlink:href with valid https:// URL.""" + xml_content = """ +
+ +

+ + SciELO Brasil + +

+ +
+ """ + xmltree = etree.fromstring(xml_content) + validator = ExtLinkValidation(xmltree) + + results = filter_results(validator.validate_xlink_href_format()) + + self.assertEqual(len(results), 1) + self.assertEqual(results[0]["response"], "OK") + + def test_validate_xlink_href_format_invalid_no_protocol(self): + """Test @xlink:href without protocol (ERROR).""" + xml_content = """ +
+ +

+ + SciELO Brasil + +

+ +
+ """ + xmltree = etree.fromstring(xml_content) + validator = ExtLinkValidation(xmltree) + + results = filter_results(validator.validate_xlink_href_format()) + + self.assertEqual(len(results), 1) + self.assertEqual(results[0]["response"], "ERROR") + self.assertIn("must start with http:// or https://", results[0]["advice"]) + self.assertIsNotNone(results[0]["adv_text"]) + self.assertIsNotNone(results[0]["adv_params"]) + + def test_validate_xlink_href_format_skips_missing(self): + """Test that format validation skips ext-links without @xlink:href.""" + xml_content = """ +
+ +

+ + SciELO Brasil + +

+ +
+ """ + xmltree = etree.fromstring(xml_content) + validator = ExtLinkValidation(xmltree) + + results = filter_results(validator.validate_xlink_href_format()) + + # Should not validate format if href is missing + self.assertEqual(len(results), 0) + + # ========== TESTS: @ext-link-type VALUE (ERROR) ========== + + def test_validate_ext_link_type_value_uri(self): + """Test @ext-link-type="uri" (valid).""" + xml_content = """ +
+ +

+ + SciELO Brasil + +

+ +
+ """ + xmltree = etree.fromstring(xml_content) + validator = ExtLinkValidation(xmltree) + + results = filter_results(validator.validate_ext_link_type_value()) + + self.assertEqual(len(results), 1) + self.assertEqual(results[0]["response"], "OK") + + def test_validate_ext_link_type_value_doi(self): + """Test @ext-link-type="doi" (valid).""" + xml_content = """ +
+ +

+ + DOI Link + +

+ +
+ """ + xmltree = etree.fromstring(xml_content) + validator = ExtLinkValidation(xmltree) + + results = filter_results(validator.validate_ext_link_type_value()) + + self.assertEqual(len(results), 1) + self.assertEqual(results[0]["response"], "OK") + + def test_validate_ext_link_type_value_pmid(self): + """Test @ext-link-type="pmid" (valid).""" + xml_content = """ +
+ +

+ + PubMed Link + +

+ +
+ """ + xmltree = etree.fromstring(xml_content) + validator = ExtLinkValidation(xmltree) + + results = filter_results(validator.validate_ext_link_type_value()) + + self.assertEqual(len(results), 1) + self.assertEqual(results[0]["response"], "OK") + + def test_validate_ext_link_type_value_pmcid(self): + """Test @ext-link-type="pmcid" (valid).""" + xml_content = """ +
+ +

+ + PMC Link + +

+ +
+ """ + xmltree = etree.fromstring(xml_content) + validator = ExtLinkValidation(xmltree) + + results = filter_results(validator.validate_ext_link_type_value()) + + self.assertEqual(len(results), 1) + self.assertEqual(results[0]["response"], "OK") + + def test_validate_ext_link_type_value_clinical_trial(self): + """Test @ext-link-type="clinical-trial" (valid).""" + xml_content = """ +
+ +

+ + Clinical Trial + +

+ +
+ """ + xmltree = etree.fromstring(xml_content) + validator = ExtLinkValidation(xmltree) + + results = filter_results(validator.validate_ext_link_type_value()) + + self.assertEqual(len(results), 1) + self.assertEqual(results[0]["response"], "OK") + + def test_validate_ext_link_type_value_invalid(self): + """Test @ext-link-type with invalid value (ERROR).""" + xml_content = """ +
+ +

+ + SciELO Brasil + +

+ +
+ """ + xmltree = etree.fromstring(xml_content) + validator = ExtLinkValidation(xmltree) + + results = filter_results(validator.validate_ext_link_type_value()) + + self.assertEqual(len(results), 1) + self.assertEqual(results[0]["response"], "ERROR") + self.assertIn("Replace @ext-link-type", results[0]["advice"]) + self.assertIsNotNone(results[0]["adv_text"]) + self.assertIsNotNone(results[0]["adv_params"]) + self.assertIn("ext_link_type", results[0]["adv_params"]) + + def test_validate_ext_link_type_value_skips_missing(self): + """Test that value validation skips ext-links without @ext-link-type.""" + xml_content = """ +
+ +

+ + SciELO Brasil + +

+ +
+ """ + xmltree = etree.fromstring(xml_content) + validator = ExtLinkValidation(xmltree) + + results = filter_results(validator.validate_ext_link_type_value()) + + # Should not validate value if type is missing + self.assertEqual(len(results), 0) + + # ========== TESTS: DESCRIPTIVE TEXT (WARNING) ========== + + def test_validate_descriptive_text_valid(self): + """Test ext-link with descriptive text (OK).""" + xml_content = """ +
+ +

+ + SciELO Brasil + +

+ +
+ """ + xmltree = etree.fromstring(xml_content) + validator = ExtLinkValidation(xmltree) + + results = filter_results(validator.validate_descriptive_text()) + + # Descriptive text should not yield any results (only invalid cases) + self.assertEqual(len(results), 0) + + def test_validate_descriptive_text_generic_leia_mais(self): + """Test ext-link with generic text 'leia mais' (WARNING).""" + xml_content = """ +
+ +

+ + Leia mais + +

+ +
+ """ + xmltree = etree.fromstring(xml_content) + validator = ExtLinkValidation(xmltree) + + results = filter_results(validator.validate_descriptive_text()) + + self.assertEqual(len(results), 1) + self.assertEqual(results[0]["response"], "WARNING") + self.assertIn("Replace generic text", results[0]["advice"]) + self.assertIsNotNone(results[0]["adv_text"]) + self.assertIsNotNone(results[0]["adv_params"]) + + def test_validate_descriptive_text_generic_click_here(self): + """Test ext-link with generic text 'click here' (WARNING).""" + xml_content = """ +
+ +

+ + click here + +

+ +
+ """ + xmltree = etree.fromstring(xml_content) + validator = ExtLinkValidation(xmltree) + + results = filter_results(validator.validate_descriptive_text()) + + self.assertEqual(len(results), 1) + self.assertEqual(results[0]["response"], "WARNING") + self.assertIn("Replace generic text", results[0]["advice"]) + + def test_validate_descriptive_text_case_insensitive(self): + """Test generic text detection is case-insensitive.""" + xml_content = """ +
+ +

+ + CLIQUE AQUI + +

+ +
+ """ + xmltree = etree.fromstring(xml_content) + validator = ExtLinkValidation(xmltree) + + results = filter_results(validator.validate_descriptive_text()) + + self.assertEqual(len(results), 1) + self.assertEqual(results[0]["response"], "WARNING") + + def test_validate_descriptive_text_empty(self): + """Test ext-link with empty text (skips validation).""" + xml_content = """ +
+ +

+ +

+ +
+ """ + xmltree = etree.fromstring(xml_content) + validator = ExtLinkValidation(xmltree) + + results = filter_results(validator.validate_descriptive_text()) + + # Empty text should be skipped + self.assertEqual(len(results), 0) + + # ========== TESTS: @xlink:title WHEN GENERIC/URL (WARNING) ========== + + def test_validate_xlink_title_when_generic_text_with_title(self): + """Test generic text with @xlink:title present (OK).""" + xml_content = """ +
+ +

+ + Leia mais + +

+ +
+ """ + xmltree = etree.fromstring(xml_content) + validator = ExtLinkValidation(xmltree) + + results = filter_results(validator.validate_xlink_title_when_generic()) + + # With xlink:title, should not yield results + self.assertEqual(len(results), 0) + + def test_validate_xlink_title_when_generic_text_without_title(self): + """Test generic text without @xlink:title (WARNING).""" + xml_content = """ +
+ +

+ + Leia mais + +

+ +
+ """ + xmltree = etree.fromstring(xml_content) + validator = ExtLinkValidation(xmltree) + + results = filter_results(validator.validate_xlink_title_when_generic()) + + self.assertEqual(len(results), 1) + self.assertEqual(results[0]["response"], "WARNING") + self.assertIn("Add @xlink:title attribute", results[0]["advice"]) + self.assertIsNotNone(results[0]["adv_text"]) + self.assertIsNotNone(results[0]["adv_params"]) + self.assertIn("reason", results[0]["adv_params"]) + + def test_validate_xlink_title_when_url_text_with_title(self): + """Test URL as text with @xlink:title present (OK).""" + xml_content = """ +
+ +

+ + https://example.com/path + +

+ +
+ """ + xmltree = etree.fromstring(xml_content) + validator = ExtLinkValidation(xmltree) + + results = filter_results(validator.validate_xlink_title_when_generic()) + + # With xlink:title, should not yield results + self.assertEqual(len(results), 0) + + def test_validate_xlink_title_when_url_text_without_title(self): + """Test URL as text without @xlink:title (WARNING).""" + xml_content = """ +
+ +

+ + https://example.com/path + +

+ +
+ """ + xmltree = etree.fromstring(xml_content) + validator = ExtLinkValidation(xmltree) + + results = filter_results(validator.validate_xlink_title_when_generic()) + + self.assertEqual(len(results), 1) + self.assertEqual(results[0]["response"], "WARNING") + self.assertIn("Add @xlink:title attribute", results[0]["advice"]) + + def test_validate_xlink_title_descriptive_text(self): + """Test descriptive text does not require @xlink:title.""" + xml_content = """ +
+ +

+ + SciELO Brasil + +

+ +
+ """ + xmltree = etree.fromstring(xml_content) + validator = ExtLinkValidation(xmltree) + + results = filter_results(validator.validate_xlink_title_when_generic()) + + # Descriptive text should not yield results + self.assertEqual(len(results), 0) + + # ========== TESTS: MULTIPLE EXT-LINKS ========== + + def test_validate_multiple_ext_links(self): + """Test validation of multiple ext-links.""" + xml_content = """ +
+ +

+ + SciELO Brasil + +

+

+ + Example + +

+

+ + click here + +

+ +
+ """ + xmltree = etree.fromstring(xml_content) + validator = ExtLinkValidation(xmltree) + + # Check all validations + type_presence_results = filter_results(validator.validate_ext_link_type_presence()) + href_presence_results = filter_results(validator.validate_xlink_href_presence()) + href_format_results = filter_results(validator.validate_xlink_href_format()) + type_value_results = filter_results(validator.validate_ext_link_type_value()) + text_results = filter_results(validator.validate_descriptive_text()) + + # First ext-link is valid + # Second ext-link: missing @ext-link-type, invalid URL format + # Third ext-link: invalid @ext-link-type value, generic text + + self.assertEqual(len(type_presence_results), 3) # All checked + self.assertEqual(len([r for r in type_presence_results if r["response"] != "OK"]), 1) # 1 error + + self.assertEqual(len(href_format_results), 3) # All 3 have href + self.assertEqual(len([r for r in href_format_results if r["response"] != "OK"]), 1) # 1 error + + self.assertEqual(len(type_value_results), 2) # Only 2 have type + self.assertEqual(len([r for r in type_value_results if r["response"] != "OK"]), 1) # 1 error + + self.assertEqual(len(text_results), 1) # Only 1 generic text + self.assertEqual(text_results[0]["response"], "WARNING") + + +if __name__ == "__main__": + unittest.main() From 992d3519cde6dd69fcf6ad76afda465f6af12ffc Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 13 Feb 2026 12:17:50 +0000 Subject: [PATCH 3/3] Fix whitespace in test file (code review feedback) Co-authored-by: robertatakenaka <505143+robertatakenaka@users.noreply.github.com> --- tests/sps/validation/test_ext_link.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/sps/validation/test_ext_link.py b/tests/sps/validation/test_ext_link.py index 108732bda..c8e4ea22c 100644 --- a/tests/sps/validation/test_ext_link.py +++ b/tests/sps/validation/test_ext_link.py @@ -17,7 +17,7 @@ def filter_results(results): class TestExtLinkValidation(unittest.TestCase): """Tests for ext-link element validations.""" - + # ========== TESTS: @ext-link-type PRESENCE (CRITICAL) ========== def test_validate_ext_link_type_presence_valid(self):