From 49e3e00f038de8c09ba1b6ae2eaa17706bf00d4f Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 13 Feb 2026 12:11:51 +0000 Subject: [PATCH 1/4] Initial plan From 4580b4fe9f387341323cedaf4b68f44e54c2cfd3 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 13 Feb 2026 12:26:39 +0000 Subject: [PATCH 2/4] Analysis complete: identified gaps in graphic validation Co-authored-by: robertatakenaka <505143+robertatakenaka@users.noreply.github.com> --- src/scielo-scholarly-data | 1 + 1 file changed, 1 insertion(+) create mode 160000 src/scielo-scholarly-data diff --git a/src/scielo-scholarly-data b/src/scielo-scholarly-data new file mode 160000 index 000000000..a2899ce8a --- /dev/null +++ b/src/scielo-scholarly-data @@ -0,0 +1 @@ +Subproject commit a2899ce8a1fa77396c516640d36686351210d606 From 18d742ccfe2758d114d0ee7172f9238cd5e160d0 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 13 Feb 2026 12:28:50 +0000 Subject: [PATCH 3/4] Implement graphic and inline-graphic validation per SPS 1.10 Co-authored-by: robertatakenaka <505143+robertatakenaka@users.noreply.github.com> --- .gitignore | 3 + packtools/sps/validation/graphic.py | 93 +++++- tests/sps/validation/test_graphic.py | 431 +++++++++++++++++++++++++++ 3 files changed, 526 insertions(+), 1 deletion(-) create mode 100644 tests/sps/validation/test_graphic.py diff --git a/.gitignore b/.gitignore index e330edc72..22cd96b79 100644 --- a/.gitignore +++ b/.gitignore @@ -42,3 +42,6 @@ nosetests.xml .venv .idea + +# Source packages +src/ diff --git a/packtools/sps/validation/graphic.py b/packtools/sps/validation/graphic.py index 9d543967e..a24e427ac 100644 --- a/packtools/sps/validation/graphic.py +++ b/packtools/sps/validation/graphic.py @@ -1,5 +1,96 @@ +import os from packtools.sps.validation.visual_resource_base import VisualResourceBaseValidation +from packtools.sps.validation.utils import build_response class GraphicValidation(VisualResourceBaseValidation): - ... + """ + Validation class for and elements according to SPS 1.10. + + Validates: + - @id attribute (required for both and ) + - @xlink:href attribute (required) + - File extensions (.jpg, .jpeg, .png, .tif, .tiff, .svg) + - .svg only allowed inside + - Accessibility elements (, ) + """ + + def validate(self): + """Execute all validations for graphic/inline-graphic elements.""" + yield self.validate_id() + yield self.validate_xlink_href() + yield from self.validate_svg_in_alternatives() + yield from self.accessibility_validation.validate() + + def validate_id(self): + """ + Validate @id attribute is present in and . + + Per SPS 1.10 specification, @id is required for both and elements. + This overrides the base class behavior which exempts inline-* elements. + """ + xml = self.data.get("xml") + tag = self.data.get("tag") + id_value = self.data.get("id") + + valid = bool(id_value) + elem = xml[:xml.find(">")+1] if xml else None + expected = f"id for {elem}" if not valid else None + + return build_response( + title="@id", + parent=self.data, + item=tag, + sub_item=None, + is_valid=valid, + validation_type="exist", + expected=expected, + obtained=id_value, + advice=f'Add id="" to {xml}' if not valid else None, + error_level=self.params["media_attributes_error_level"], + data=self.data, + ) + + def validate_svg_in_alternatives(self): + """ + Validate that .svg extension is only used when is inside . + + Per SPS 1.10 specification: + - .svg files are only allowed when the graphic is inside + - Other formats (.jpg, .jpeg, .png, .tif, .tiff) can be used anywhere + + Yields: + dict: Validation response + """ + xlink_href = self.data.get("xlink_href") + parent_tag = self.data.get("parent_tag") + + if not xlink_href: + return + + # Get file extension + _, ext = os.path.splitext(xlink_href) + ext = ext.lower() + + # Check if it's an SVG file + if ext == ".svg": + # SVG is only valid inside + is_valid = parent_tag == "alternatives" + + yield build_response( + title="SVG in alternatives", + parent=self.data, + item=self.data.get("tag"), + sub_item="xlink_href", + is_valid=is_valid, + validation_type="format", + expected=" with .svg extension inside ", + obtained=f"{self.data.get('tag')} with .svg inside <{parent_tag}>", + advice=( + f"SVG files are only allowed inside . " + f"The file '{xlink_href}' is currently in <{parent_tag}>. " + f"Either move this inside or use a different format (.jpg, .png, .tif)." + ) if not is_valid else None, + error_level=self.params.get("svg_error_level", "ERROR"), + data=self.data, + ) diff --git a/tests/sps/validation/test_graphic.py b/tests/sps/validation/test_graphic.py new file mode 100644 index 000000000..f83d19b03 --- /dev/null +++ b/tests/sps/validation/test_graphic.py @@ -0,0 +1,431 @@ +""" +Tests for GraphicValidation class according to SPS 1.10 specification. + +Tests validation of and elements including: +- @id attribute (required for both) +- @xlink:href attribute (required) +- File extensions +- SVG only in +- Accessibility elements +""" + +import unittest +from lxml import etree +from packtools.sps.models.graphic import XmlGraphic +from packtools.sps.validation.graphic import GraphicValidation + + +class TestGraphicValidation(unittest.TestCase): + """Test validations for and elements per SPS 1.10.""" + + def setUp(self): + """Set up validation parameters for each test.""" + self.params = { + "media_attributes_error_level": "CRITICAL", + "xlink_href_error_level": "ERROR", + "valid_extension": ["jpg", "png", "tif", "tiff", "jpeg", "svg"], + "svg_error_level": "ERROR", + "alt_text_exist_error_level": "WARNING", + "alt_text_content_error_level": "CRITICAL", + "alt_text_media_restriction_error_level": "ERROR", + "alt_text_duplication_error_level": "WARNING", + "decorative_alt_text_error_level": "INFO", + "long_desc_exist_error_level": "WARNING", + "long_desc_content_error_level": "CRITICAL", + "long_desc_minimum_length_error_level": "ERROR", + "long_desc_media_restriction_error_level": "ERROR", + "long_desc_duplication_error_level": "WARNING", + "long_desc_occurrence_error_level": "ERROR", + "long_desc_null_incompatibility_error_level": "WARNING", + "xref_transcript_error_level": "WARNING", + "transcript_error_level": "WARNING", + "content_type_error_level": "CRITICAL", + "speaker_speech_error_level": "WARNING", + "structure_error_level": "CRITICAL", + "content_types": ["machine-generated"], + } + + # ========== Tests for @id validation (Rules 1 & 3) ========== + + def test_graphic_with_id_is_valid(self): + """Test that with @id attribute passes validation.""" + xml_content = """ +
+ + + + Test + + + +
+ """ + tree = etree.fromstring(xml_content.encode()) + graphics_data = list(XmlGraphic(tree).data) + + validator = GraphicValidation(graphics_data[0], self.params) + result = validator.validate_id() + + self.assertEqual(result["response"], "OK") + self.assertEqual(result["got_value"], "g1") + + def test_graphic_without_id_fails(self): + """Test that without @id attribute fails with CRITICAL error.""" + xml_content = """ +
+ + + + + +
+ """ + tree = etree.fromstring(xml_content.encode()) + graphics_data = list(XmlGraphic(tree).data) + + validator = GraphicValidation(graphics_data[0], self.params) + result = validator.validate_id() + + self.assertEqual(result["response"], "CRITICAL") + self.assertIsNone(result["got_value"]) + self.assertIn("Add id=", result["advice"]) + + def test_inline_graphic_with_id_is_valid(self): + """Test that with @id attribute passes validation.""" + xml_content = """ +
+ +

+ Text with image. +

+ +
+ """ + tree = etree.fromstring(xml_content.encode()) + graphics_data = list(XmlGraphic(tree).data) + + validator = GraphicValidation(graphics_data[0], self.params) + result = validator.validate_id() + + self.assertEqual(result["response"], "OK") + self.assertEqual(result["got_value"], "ig1") + + def test_inline_graphic_without_id_fails(self): + """Test that without @id fails per SPS 1.10 (Rule 3).""" + xml_content = """ +
+ +

+ +

+ +
+ """ + tree = etree.fromstring(xml_content.encode()) + graphics_data = list(XmlGraphic(tree).data) + + validator = GraphicValidation(graphics_data[0], self.params) + result = validator.validate_id() + + self.assertEqual(result["response"], "CRITICAL") + self.assertIsNone(result["got_value"]) + + # ========== Tests for @xlink:href validation (Rules 2 & 4) ========== + + def test_graphic_with_valid_extension_jpg(self): + """Test that with .jpg extension passes validation.""" + xml_content = """ +
+ + + + Test + + + +
+ """ + tree = etree.fromstring(xml_content.encode()) + graphics_data = list(XmlGraphic(tree).data) + + validator = GraphicValidation(graphics_data[0], self.params) + result = validator.validate_xlink_href() + + self.assertEqual(result["response"], "OK") + self.assertEqual(result["got_value"], "image.jpg") + + def test_graphic_with_valid_extension_jpeg(self): + """Test that with .jpeg extension passes validation.""" + xml_content = """ +
+ + + + Test + + + +
+ """ + tree = etree.fromstring(xml_content.encode()) + graphics_data = list(XmlGraphic(tree).data) + + validator = GraphicValidation(graphics_data[0], self.params) + result = validator.validate_xlink_href() + + self.assertEqual(result["response"], "OK") + + def test_graphic_with_valid_extension_png(self): + """Test that with .png extension passes validation.""" + xml_content = """ +
+ + + + Test + + + +
+ """ + tree = etree.fromstring(xml_content.encode()) + graphics_data = list(XmlGraphic(tree).data) + + validator = GraphicValidation(graphics_data[0], self.params) + result = validator.validate_xlink_href() + + self.assertEqual(result["response"], "OK") + + def test_graphic_with_valid_extension_tif(self): + """Test that with .tif extension passes validation.""" + xml_content = """ +
+ + + + Test + + + +
+ """ + tree = etree.fromstring(xml_content.encode()) + graphics_data = list(XmlGraphic(tree).data) + + validator = GraphicValidation(graphics_data[0], self.params) + result = validator.validate_xlink_href() + + self.assertEqual(result["response"], "OK") + + def test_graphic_with_valid_extension_tiff(self): + """Test that with .tiff extension passes validation.""" + xml_content = """ +
+ + + + Test + + + +
+ """ + tree = etree.fromstring(xml_content.encode()) + graphics_data = list(XmlGraphic(tree).data) + + validator = GraphicValidation(graphics_data[0], self.params) + result = validator.validate_xlink_href() + + self.assertEqual(result["response"], "OK") + + def test_inline_graphic_with_valid_extension(self): + """Test that with valid extension passes validation.""" + xml_content = """ +
+ +

+ +

+ +
+ """ + tree = etree.fromstring(xml_content.encode()) + graphics_data = list(XmlGraphic(tree).data) + + validator = GraphicValidation(graphics_data[0], self.params) + result = validator.validate_xlink_href() + + self.assertEqual(result["response"], "OK") + + def test_graphic_with_invalid_extension_fails(self): + """Test that with invalid extension fails validation.""" + xml_content = """ +
+ + + + Test + + + +
+ """ + tree = etree.fromstring(xml_content.encode()) + graphics_data = list(XmlGraphic(tree).data) + + validator = GraphicValidation(graphics_data[0], self.params) + result = validator.validate_xlink_href() + + self.assertEqual(result["response"], "ERROR") + self.assertEqual(result["got_value"], "document.pdf") + + # ========== Tests for SVG in alternatives validation (Rule 7) ========== + + def test_svg_in_alternatives_is_valid(self): + """Test that .svg file inside passes validation.""" + xml_content = """ +
+ + + + + Test + + + Test + + + + +
+ """ + tree = etree.fromstring(xml_content.encode()) + graphics_data = list(XmlGraphic(tree).data) + + # Test first graphic (svg) + validator = GraphicValidation(graphics_data[0], self.params) + results = list(validator.validate_svg_in_alternatives()) + + # Should have one result for svg check + self.assertEqual(len(results), 1) + self.assertEqual(results[0]["response"], "OK") + + def test_svg_not_in_alternatives_fails(self): + """Test that .svg file NOT inside fails validation (Rule 7).""" + xml_content = """ +
+ + + + + +
+ """ + tree = etree.fromstring(xml_content.encode()) + graphics_data = list(XmlGraphic(tree).data) + + validator = GraphicValidation(graphics_data[0], self.params) + results = list(validator.validate_svg_in_alternatives()) + + # Should fail validation + self.assertEqual(len(results), 1) + self.assertEqual(results[0]["response"], "ERROR") + self.assertIn("alternatives", results[0]["advice"].lower()) + + def test_non_svg_not_in_alternatives_is_valid(self): + """Test that non-.svg file NOT in passes validation.""" + xml_content = """ +
+ + + + Test + + + +
+ """ + tree = etree.fromstring(xml_content.encode()) + graphics_data = list(XmlGraphic(tree).data) + + validator = GraphicValidation(graphics_data[0], self.params) + results = list(validator.validate_svg_in_alternatives()) + + # Should not generate any results (no svg to check) + self.assertEqual(len(results), 0) + + def test_svg_case_insensitive(self): + """Test that SVG validation is case-insensitive (.SVG, .Svg, .svg).""" + xml_content = """ +
+ + + + + +
+ """ + tree = etree.fromstring(xml_content.encode()) + graphics_data = list(XmlGraphic(tree).data) + + validator = GraphicValidation(graphics_data[0], self.params) + results = list(validator.validate_svg_in_alternatives()) + + # Should fail even with uppercase .SVG + self.assertEqual(len(results), 1) + self.assertEqual(results[0]["response"], "ERROR") + + # ========== Integration tests ========== + + def test_complete_validation_all_valid(self): + """Test complete validation with all rules passing.""" + xml_content = """ +
+ + + + Description of the image + + + +
+ """ + tree = etree.fromstring(xml_content.encode()) + graphics_data = list(XmlGraphic(tree).data) + + validator = GraphicValidation(graphics_data[0], self.params) + results = [r for r in validator.validate() if r is not None] + + # All critical checks should pass + critical_checks = [r for r in results if r.get("title") in ["@id", "@xlink:href validation"]] + for check in critical_checks: + self.assertEqual(check["response"], "OK") + + def test_complete_validation_multiple_failures(self): + """Test complete validation with multiple rule violations.""" + xml_content = """ +
+ + + + + +
+ """ + tree = etree.fromstring(xml_content.encode()) + graphics_data = list(XmlGraphic(tree).data) + + validator = GraphicValidation(graphics_data[0], self.params) + results = [r for r in validator.validate() if r is not None] + + # Should have multiple failures + failures = [r for r in results if r.get("response") not in ["OK", None]] + + # At minimum: missing @id and svg not in alternatives + failure_titles = [r.get("title") for r in failures] + self.assertIn("@id", failure_titles) + self.assertIn("SVG in alternatives", failure_titles) + + +if __name__ == "__main__": + unittest.main() From 09b420fde5d889b21aa2a4d9ab09a37953374066 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 13 Feb 2026 12:31:27 +0000 Subject: [PATCH 4/4] Address code review feedback: improve test naming consistency Co-authored-by: robertatakenaka <505143+robertatakenaka@users.noreply.github.com> --- tests/sps/validation/test_graphic.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/sps/validation/test_graphic.py b/tests/sps/validation/test_graphic.py index f83d19b03..6d86dcb6e 100644 --- a/tests/sps/validation/test_graphic.py +++ b/tests/sps/validation/test_graphic.py @@ -47,7 +47,7 @@ def setUp(self): # ========== Tests for @id validation (Rules 1 & 3) ========== - def test_graphic_with_id_is_valid(self): + def test_graphic_with_id_passes(self): """Test that with @id attribute passes validation.""" xml_content = """
@@ -90,7 +90,7 @@ def test_graphic_without_id_fails(self): self.assertIsNone(result["got_value"]) self.assertIn("Add id=", result["advice"]) - def test_inline_graphic_with_id_is_valid(self): + def test_inline_graphic_with_id_passes(self): """Test that with @id attribute passes validation.""" xml_content = """
@@ -281,7 +281,7 @@ def test_graphic_with_invalid_extension_fails(self): # ========== Tests for SVG in alternatives validation (Rule 7) ========== - def test_svg_in_alternatives_is_valid(self): + def test_svg_in_alternatives_passes(self): """Test that .svg file inside passes validation.""" xml_content = """
@@ -332,7 +332,7 @@ def test_svg_not_in_alternatives_fails(self): self.assertEqual(results[0]["response"], "ERROR") self.assertIn("alternatives", results[0]["advice"].lower()) - def test_non_svg_not_in_alternatives_is_valid(self): + def test_non_svg_not_in_alternatives_passes(self): """Test that non-.svg file NOT in passes validation.""" xml_content = """