From 84f51d91390b6b72d2a8cd222f2896b8c8982b7f Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 13 Feb 2026 12:12:59 +0000 Subject: [PATCH 1/3] Initial plan From bcfeb947d2da86eaf95c11e1e8629729c2cf3adc Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 13 Feb 2026 12:41:43 +0000 Subject: [PATCH 2/3] Implement list element validation with model, validation and tests Co-authored-by: robertatakenaka <505143+robertatakenaka@users.noreply.github.com> --- packtools/sps/models/list.py | 112 ++++++++ packtools/sps/validation/list.py | 216 +++++++++++++++ src/scielo-scholarly-data | 1 + tests/sps/validation/test_list.py | 420 ++++++++++++++++++++++++++++++ 4 files changed, 749 insertions(+) create mode 100644 packtools/sps/models/list.py create mode 100644 packtools/sps/validation/list.py create mode 160000 src/scielo-scholarly-data create mode 100644 tests/sps/validation/test_list.py diff --git a/packtools/sps/models/list.py b/packtools/sps/models/list.py new file mode 100644 index 000000000..10cc7474f --- /dev/null +++ b/packtools/sps/models/list.py @@ -0,0 +1,112 @@ +from packtools.sps.utils.xml_utils import put_parent_context, tostring + + +class List: + def __init__(self, element): + self.element = element + + def __str__(self): + return tostring(self.element, xml_declaration=False) + + def xml(self, pretty_print=True): + return tostring( + node=self.element, + doctype=None, + pretty_print=pretty_print, + xml_declaration=False, + ) + + @property + def list_type(self): + """Returns the value of @list-type attribute""" + return self.element.get("list-type") + + @property + def title(self): + """Returns the text content of element if present""" + return self.element.findtext("title") + + @property + def list_items(self): + """Returns all <list-item> elements""" + return self.element.findall("list-item") + + @property + def list_items_count(self): + """Returns the count of <list-item> elements""" + return len(self.list_items) + + @property + def has_label_in_items(self): + """Checks if any <list-item> contains a <label> element""" + for item in self.list_items: + if item.find("label") is not None: + return True + return False + + @property + def empty_list_items(self): + """Returns list of <list-item> elements that have no content""" + empty_items = [] + for item in self.list_items: + # Check if list-item has any child elements or text content + has_content = len(item) > 0 or (item.text and item.text.strip()) + if not has_content: + empty_items.append(item) + return empty_items + + @property + def data(self): + """Returns a dictionary with list data for validation""" + return { + "list_type": self.list_type, + "title": self.title, + "list_items_count": self.list_items_count, + "has_label_in_items": self.has_label_in_items, + "empty_list_items_count": len(self.empty_list_items), + } + + +class Lists: + def __init__(self, node): + self.node = node + self.parent = self.node.tag + self.parent_id = self.node.get("id") + self.lang = self.node.get("{http://www.w3.org/XML/1998/namespace}lang") + self.article_type = self.node.get("article-type") + + def lists(self): + if self.parent == "article": + path = "./front//list | ./body//list | ./back//list" + else: + path = ".//list" + + for list_element in self.node.xpath(path): + data = List(list_element).data + yield put_parent_context(data, self.lang, self.article_type, self.parent, self.parent_id) + + +class ArticleLists: + def __init__(self, xml_tree): + self.xml_tree = xml_tree + + @property + def get_all_lists(self): + """Returns all <list> elements in the document""" + yield from self.get_article_lists + yield from self.get_sub_article_translation_lists + yield from self.get_sub_article_non_translation_lists + + @property + def get_article_lists(self): + yield from Lists(self.xml_tree.find(".")).lists() + + @property + def get_sub_article_translation_lists(self): + for node in self.xml_tree.xpath(".//sub-article[@article-type='translation']"): + yield from Lists(node).lists() + + @property + def get_sub_article_non_translation_lists(self): + for node in self.xml_tree.xpath(".//sub-article[@article-type!='translation']"): + yield from Lists(node).lists() diff --git a/packtools/sps/validation/list.py b/packtools/sps/validation/list.py new file mode 100644 index 000000000..a61b066b7 --- /dev/null +++ b/packtools/sps/validation/list.py @@ -0,0 +1,216 @@ +from packtools.sps.models.list import ArticleLists +from packtools.sps.validation.utils import build_response + + +class ArticleListValidation: + def __init__(self, xml_tree, rules): + self.xml_tree = xml_tree + self.rules = rules + self.elements = list(ArticleLists(xml_tree).get_all_lists) + + def validate(self): + if self.elements: + for element in self.elements: + yield from ListValidation(element, self.rules).validate() + + +class ListValidation: + def __init__(self, data, rules): + self.data = data + self.rules = self.get_default_params() + self.rules.update(rules or {}) + + def get_default_params(self): + return { + "list_type_presence_error_level": "CRITICAL", + "list_type_value_error_level": "ERROR", + "min_list_items_error_level": "ERROR", + "label_in_list_item_error_level": "WARNING", + "empty_list_item_error_level": "WARNING", + "missing_title_error_level": "INFO", + "allowed_list_types": [ + "order", + "bullet", + "alpha-lower", + "alpha-upper", + "roman-lower", + "roman-upper", + "simple", + ], + "min_list_items": 2, + } + + def validate(self): + yield from self.validate_list_type_presence() + yield from self.validate_list_type_value() + yield from self.validate_min_list_items() + yield from self.validate_no_label_in_list_items() + yield from self.validate_list_items_have_content() + yield from self.validate_title_recommendation() + + def validate_list_type_presence(self): + """ + P0 - Rule 1: Validate presence of @list-type attribute (CRITICAL) + """ + list_type = self.data.get("list_type") + is_valid = list_type is not None and list_type != "" + + if list_type is None: + obtained = None + advice = "Add list-type attribute to <list> element. Use one of: order, bullet, alpha-lower, alpha-upper, roman-lower, roman-upper, simple" + elif list_type == "": + obtained = '""' + advice = "The @list-type attribute cannot be empty. Use one of: order, bullet, alpha-lower, alpha-upper, roman-lower, roman-upper, simple" + else: + obtained = list_type + advice = None + + yield build_response( + title="@list-type presence", + parent=self.data, + item="list", + sub_item="@list-type", + validation_type="exist", + is_valid=is_valid, + expected="@list-type", + obtained=obtained, + advice=advice, + data=self.data, + error_level=self.rules["list_type_presence_error_level"], + ) + + def validate_list_type_value(self): + """ + P0 - Rule 2: Validate allowed values of @list-type (ERROR) + """ + list_type = self.data.get("list_type") + allowed_list_types = self.rules["allowed_list_types"] + + # Only validate value if list_type is present and not empty + if list_type and list_type != "": + is_valid = list_type in allowed_list_types + + if not is_valid: + advice = f'Value "{list_type}" is not allowed for @list-type. Use one of: {", ".join(allowed_list_types)}' + else: + advice = None + + yield build_response( + title="@list-type value", + parent=self.data, + item="list", + sub_item="@list-type", + validation_type="value in list", + is_valid=is_valid, + expected=allowed_list_types, + obtained=list_type, + advice=advice, + data=self.data, + error_level=self.rules["list_type_value_error_level"], + ) + + def validate_min_list_items(self): + """ + P0 - Rule 3: Validate minimum presence of <list-item> (ERROR) + """ + list_items_count = self.data.get("list_items_count", 0) + min_items = self.rules["min_list_items"] + is_valid = list_items_count >= min_items + + if not is_valid: + advice = f"<list> must contain at least {min_items} <list-item> elements. Found {list_items_count}" + else: + advice = None + + yield build_response( + title="minimum list items", + parent=self.data, + item="list", + sub_item="list-item count", + validation_type="exist", + is_valid=is_valid, + expected=f"at least {min_items} list-item elements", + obtained=f"{list_items_count} list-item elements", + advice=advice, + data=self.data, + error_level=self.rules["min_list_items_error_level"], + ) + + def validate_no_label_in_list_items(self): + """ + P0 - Rule 4: Validate absence of <label> in <list-item> (WARNING) + """ + has_label = self.data.get("has_label_in_items", False) + is_valid = not has_label + + if not is_valid: + advice = "For accessibility, do not use <label> in <list-item>. The @list-type attribute generates labels automatically" + else: + advice = None + + yield build_response( + title="no label in list-item", + parent=self.data, + item="list", + sub_item="label", + validation_type="exist", + is_valid=is_valid, + expected="no <label> in <list-item>", + obtained="<label> found in <list-item>" if has_label else "no <label>", + advice=advice, + data=self.data, + error_level=self.rules["label_in_list_item_error_level"], + ) + + def validate_list_items_have_content(self): + """ + P1 - Rule 5: Validate that each <list-item> has content (WARNING) + """ + empty_count = self.data.get("empty_list_items_count", 0) + is_valid = empty_count == 0 + + if not is_valid: + advice = f"Found {empty_count} empty <list-item> element(s). Each <list-item> should contain at least one child element (typically <p>)" + else: + advice = None + + yield build_response( + title="list-item has content", + parent=self.data, + item="list", + sub_item="list-item content", + validation_type="exist", + is_valid=is_valid, + expected="all list-item elements with content", + obtained=f"{empty_count} empty list-item(s)" if not is_valid else "all list-items have content", + advice=advice, + data=self.data, + error_level=self.rules["empty_list_item_error_level"], + ) + + def validate_title_recommendation(self): + """ + P1 - Rule 6: Recommend use of <title> when appropriate (INFO) + """ + title = self.data.get("title") + has_title = title is not None and title.strip() != "" + is_valid = True # This is just a recommendation, not a strict requirement + + if not has_title: + advice = "Consider adding a <title> element if the list has a descriptive heading" + else: + advice = None + + yield build_response( + title="list title recommendation", + parent=self.data, + item="list", + sub_item="title", + validation_type="exist", + is_valid=is_valid, + expected="<title> when list has a descriptive heading", + obtained="<title> present" if has_title else "no <title>", + advice=advice, + data=self.data, + error_level=self.rules["missing_title_error_level"], + ) diff --git a/src/scielo-scholarly-data b/src/scielo-scholarly-data new file mode 160000 index 000000000..a2899ce8a --- /dev/null +++ b/src/scielo-scholarly-data @@ -0,0 +1 @@ +Subproject commit a2899ce8a1fa77396c516640d36686351210d606 diff --git a/tests/sps/validation/test_list.py b/tests/sps/validation/test_list.py new file mode 100644 index 000000000..689f25f0e --- /dev/null +++ b/tests/sps/validation/test_list.py @@ -0,0 +1,420 @@ +import unittest +from lxml import etree + +from packtools.sps.validation.list import ArticleListValidation + + +class ListValidationTest(unittest.TestCase): + def test_list_validation_valid_bullet_list(self): + """Test valid bullet list with all required attributes""" + self.maxDiff = None + xml_tree = etree.fromstring( + '<article xmlns:xlink="http://www.w3.org/1999/xlink" ' + 'dtd-version="1.0" article-type="research-article" xml:lang="pt">' + "<body>" + '<list list-type="bullet">' + " <title>Nam commodo" + " " + "

Morbi luctus elit enim.

" + "
" + " " + "

Nullam nunc leo.

" + "
" + "" + "" + "" + ) + obtained = list( + ArticleListValidation( + xml_tree, + { + "list_type_presence_error_level": "CRITICAL", + "list_type_value_error_level": "ERROR", + "min_list_items_error_level": "ERROR", + "label_in_list_item_error_level": "WARNING", + "empty_list_item_error_level": "WARNING", + "missing_title_error_level": "INFO", + }, + ).validate() + ) + + # All validations should pass (response = "OK") + for item in obtained: + with self.subTest(item["title"]): + self.assertEqual(item["response"], "OK") + + def test_list_validation_missing_list_type(self): + """Test list without @list-type attribute (CRITICAL error)""" + self.maxDiff = None + xml_tree = etree.fromstring( + '
' + "" + "" + " " + "

Item 1.

" + "
" + " " + "

Item 2.

" + "
" + "
" + "" + "
" + ) + obtained = list( + ArticleListValidation( + xml_tree, + { + "list_type_presence_error_level": "CRITICAL", + "list_type_value_error_level": "ERROR", + "min_list_items_error_level": "ERROR", + "label_in_list_item_error_level": "WARNING", + }, + ).validate() + ) + + # Check that list_type presence validation failed + list_type_validation = [ + item for item in obtained if item["title"] == "@list-type presence" + ][0] + self.assertEqual(list_type_validation["response"], "CRITICAL") + self.assertIsNone(list_type_validation["got_value"]) + self.assertIn("Add list-type attribute", list_type_validation["advice"]) + + def test_list_validation_empty_list_type(self): + """Test list with empty @list-type attribute (CRITICAL error)""" + self.maxDiff = None + xml_tree = etree.fromstring( + '
' + "" + '' + " " + "

Item 1.

" + "
" + " " + "

Item 2.

" + "
" + "
" + "" + "
" + ) + obtained = list( + ArticleListValidation( + xml_tree, + { + "list_type_presence_error_level": "CRITICAL", + "list_type_value_error_level": "ERROR", + }, + ).validate() + ) + + # Check that list_type presence validation failed + list_type_validation = [ + item for item in obtained if item["title"] == "@list-type presence" + ][0] + self.assertEqual(list_type_validation["response"], "CRITICAL") + self.assertEqual(list_type_validation["got_value"], '""') + self.assertIn("cannot be empty", list_type_validation["advice"]) + + def test_list_validation_invalid_list_type_value(self): + """Test list with invalid @list-type value (ERROR)""" + self.maxDiff = None + xml_tree = etree.fromstring( + '
' + "" + '' + " " + "

Item 1.

" + "
" + " " + "

Item 2.

" + "
" + "
" + "" + "
" + ) + obtained = list( + ArticleListValidation( + xml_tree, + { + "list_type_presence_error_level": "CRITICAL", + "list_type_value_error_level": "ERROR", + }, + ).validate() + ) + + # Check that list_type value validation failed + list_type_value_validation = [ + item for item in obtained if item["title"] == "@list-type value" + ][0] + self.assertEqual(list_type_value_validation["response"], "ERROR") + self.assertEqual(list_type_value_validation["got_value"], "numbered") + self.assertIn('"numbered" is not allowed', list_type_value_validation["advice"]) + + def test_list_validation_only_one_list_item(self): + """Test list with only one (ERROR)""" + self.maxDiff = None + xml_tree = etree.fromstring( + '
' + "" + '' + " " + "

Only one item.

" + "
" + "
" + "" + "
" + ) + obtained = list( + ArticleListValidation( + xml_tree, + { + "min_list_items_error_level": "ERROR", + }, + ).validate() + ) + + # Check that min list items validation failed + min_items_validation = [ + item for item in obtained if item["title"] == "minimum list items" + ][0] + self.assertEqual(min_items_validation["response"], "ERROR") + self.assertIn("at least 2", min_items_validation["advice"]) + + def test_list_validation_with_label_in_list_item(self): + """Test list with