Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 27 additions & 4 deletions packtools/sps/models/fig.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,12 +62,12 @@ def file_extension(self):
@property
def graphic_alt_text(self):
"""
Extracts alt-text from graphic within alternatives.
Extracts alt-text from graphic (anywhere in fig, not just in alternatives).

Returns:
str or None: The text content of <alt-text> if present, None otherwise.
"""
graphic = self.element.find(".//alternatives/graphic")
graphic = self.element.find(".//graphic")
if graphic is not None:
alt_text_elem = graphic.find("alt-text")
if alt_text_elem is not None:
Expand All @@ -77,18 +77,39 @@ def graphic_alt_text(self):
@property
def graphic_long_desc(self):
"""
Extracts long-desc from graphic within alternatives.
Extracts long-desc from graphic (anywhere in fig, not just in alternatives).

Returns:
str or None: The text content of <long-desc> if present, None otherwise.
"""
graphic = self.element.find(".//alternatives/graphic")
graphic = self.element.find(".//graphic")
if graphic is not None:
long_desc_elem = graphic.find("long-desc")
if long_desc_elem is not None:
return long_desc_elem.text
return None

@property
def xml_lang(self):
"""
Extracts xml:lang attribute from fig element.

Returns:
str or None: The xml:lang attribute value if present, None otherwise.
"""
return self.element.get("{http://www.w3.org/XML/1998/namespace}lang")

@property
def parent_name(self):
"""
Gets the tag name of the parent element.

Returns:
str or None: The parent tag name if present, None otherwise.
"""
parent = self.element.getparent()
return parent.tag if parent is not None else None

@property
def data(self):
return {
Expand All @@ -103,6 +124,8 @@ def data(self):
"file_extension": self.file_extension,
"graphic_alt_text": self.graphic_alt_text,
"graphic_long_desc": self.graphic_long_desc,
"xml_lang": self.xml_lang,
"parent_name": self.parent_name,
}


Expand Down
236 changes: 188 additions & 48 deletions packtools/sps/validation/fig.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ class ArticleFigValidation:
def __init__(self, xml_tree, rules):
self.xml_tree = xml_tree
self.rules = rules
self.article_types_requires = rules["article_types_requires"]
self.article_types_requires = rules.get("article_types_requires", [])
self.article_type = xml_tree.find(".").get("article-type")
self.required = self.article_type in self.article_types_requires
self.elements = list(ArticleFigs(xml_tree).get_all_figs)
Expand All @@ -15,13 +15,12 @@ def validate(self):
if self.elements:
for element in self.elements:
yield from FigValidation(element, self.rules).validate()

else:
elif self.required:
yield format_response(
title="fig presence",
parent="article",
parent_id=None,
parent_article_type=self.xml_tree.get("article-type"),
parent_article_type=self.article_type,
parent_lang=self.xml_tree.get(
"{http://www.w3.org/XML/1998/namespace}lang"
),
Expand All @@ -31,9 +30,9 @@ def validate(self):
is_valid=False,
expected="<fig>",
obtained=None,
advice=f'({self.article_type}) No <fig> found in XML',
advice=f'article-type={self.article_type} requires <fig>. Found 0. Identify the fig or check if article-type is correct',
data=None,
error_level=self.rules["absent_error_level"],
error_level=self.rules.get("absent_error_level", "WARNING"),
)


Expand All @@ -45,80 +44,221 @@ def __init__(self, data, rules):

def get_default_params(self):
return {
"alternatives_error_level": "CRITICAL",
"error_level": "WARNING",
"required_error_level": "CRITICAL",
"absent_error_level": "WARNING",
"id_error_level": "CRITICAL",
"label_error_level": "CRITICAL",
"caption_error_level": "CRITICAL",
"content_error_level": "CRITICAL",
"file_extension_error_level": "CRITICAL",
"allowed file extensions": ["tif", "jpg", "png", "svg"]
"graphic_error_level": "CRITICAL",
"xlink_href_error_level": "CRITICAL",
"file_extension_error_level": "ERROR",
"fig_type_error_level": "ERROR",
"xml_lang_in_fig_group_error_level": "ERROR",
"accessibility_error_level": "WARNING",
"alt_text_length_error_level": "WARNING",
"allowed_file_extensions": ["jpg", "jpeg", "png", "tif", "tiff"],
"allowed_fig_types": ["graphic", "chart", "diagram", "drawing", "illustration", "map"],
"alt_text_max_length": 120,
"article_types_requires": []
}

def validate(self):
yield from self._validate_item("id")
yield from self._validate_item("label")
yield from self._validate_item("caption")
yield from self.validate_content()
yield from self.validate_file_extension()

def _validate_item(self, name):
obtained = self.data.get(name)
# P0 - Critical: Rule 1 - Validate @id presence
yield self.validate_id()

# P0 - Critical: Rule 2 - Validate <graphic> presence
yield self.validate_graphic()

# P0 - Critical: Rule 3 - Validate @xlink:href in <graphic>
yield self.validate_xlink_href()

# P0 - ERROR: Rule 4 - Validate file extension
yield self.validate_file_extension()

# P0 - ERROR: Rule 5 - Validate @fig-type values (only if present)
if self.data.get("type"):
yield self.validate_fig_type()

# P1 - ERROR: Rule 6 - Validate @xml:lang in fig-group (only if in fig-group)
if self.data.get("parent_name") == "fig-group":
yield self.validate_xml_lang_in_fig_group()

# P1 - WARNING: Rule 7 - Validate accessibility (alt-text or long-desc)
yield self.validate_accessibility()

# P1 - WARNING: Rule 8 - Validate alt-text length (only if alt-text present)
if self.data.get("graphic_alt_text"):
yield self.validate_alt_text_length()

def validate_id(self):
"""Rule 1: Validate presence of @id (CRITICAL)"""
obtained = self.data.get("id")
is_valid = bool(obtained)
key_error_level = f"{name}_error_level"
advice = f'Mark each figure {name} inside <body> using <fig><{name}>. Consult SPS documentation for more detail.'
yield build_response(
title=name,
return build_response(
title="@id",
parent=self.data,
item="fig",
sub_item=name,
sub_item="@id",
validation_type="exist",
is_valid=is_valid,
expected=name,
expected="@id attribute",
obtained=obtained,
advice=advice,
advice='Add @id attribute to <fig>. Example: <fig id="f01">. The @id attribute is mandatory.',
data=self.data,
error_level=self.rules["id_error_level"],
)

def validate_graphic(self):
"""Rule 2: Validate presence of <graphic> (CRITICAL)"""
obtained = self.data.get("graphic")
is_valid = bool(obtained)
return build_response(
title="<graphic>",
parent=self.data,
item="fig",
sub_item="graphic",
validation_type="exist",
is_valid=is_valid,
expected="<graphic> element",
obtained=obtained,
advice='Add <graphic> element inside <fig>. Example: <graphic xlink:href="image.jpg"/>. Every <fig> must contain at least one <graphic> element.',
data=self.data,
error_level=self.rules[key_error_level],
error_level=self.rules["graphic_error_level"],
)

def validate_content(self):
is_valid = bool(self.data.get("graphic") or self.data.get("alternatives"))
name = "graphic or alternatives"
yield build_response(
title=name,
def validate_xlink_href(self):
"""Rule 3: Validate @xlink:href in <graphic> (CRITICAL)"""
graphic = self.data.get("graphic")
is_valid = bool(graphic)
return build_response(
title="@xlink:href",
parent=self.data,
item="fig",
sub_item=name,
sub_item="@xlink:href",
validation_type="exist",
is_valid=is_valid,
expected=name,
obtained=None,
advice='Ensure that the figure contains either <graphic> or <alternatives> inside <fig>. Consult SPS documentation for more detail.',
expected="@xlink:href attribute in <graphic>",
obtained=graphic,
advice='Add @xlink:href attribute to <graphic>. Example: <graphic xlink:href="image.jpg"/>. The @xlink:href attribute is mandatory in <graphic>.',
data=self.data,
error_level=self.rules["content_error_level"],
error_level=self.rules["xlink_href_error_level"],
)

def validate_file_extension(self):
"""Rule 4: Validate file extension (ERROR)"""
file_extension = self.data.get("file_extension")
file_path = self.data.get('graphic')
allowed_file_extensions = self.rules["allowed file extensions"]
is_valid = file_extension in allowed_file_extensions
if file_extension:
advice = f'In <fig><graphic xlink:href="{file_path}"/> replace {file_extension} with one of {allowed_file_extensions}'
graphic_href = self.data.get("graphic")
allowed_extensions = self.rules["allowed_file_extensions"]
alternative_elements = self.data.get("alternative_elements", [])

# Check if file is SVG and if it's inside alternatives
is_svg = file_extension == "svg"
is_in_alternatives = len(alternative_elements) > 0

# SVG is only allowed inside alternatives
if is_svg:
is_valid = is_in_alternatives
if not is_valid:
advice = f'SVG files are only allowed inside <alternatives>. Either use a different format ({", ".join(allowed_extensions)}) or wrap the graphic in <alternatives>.'
else:
advice = None
else:
advice = f'In <fig><graphic xlink:href="{file_path}"/> specify a valid file extension from: {allowed_file_extensions}'
yield build_response(
is_valid = file_extension in allowed_extensions if file_extension else False
if file_extension and not is_valid:
advice = f'File extension "{file_extension}" is not allowed. Use one of: {", ".join(allowed_extensions)}. If using SVG, it must be inside <alternatives>.'
elif not file_extension:
advice = f'File "{graphic_href}" must have a valid extension. Allowed: {", ".join(allowed_extensions)}. SVG is only allowed inside <alternatives>.'
else:
advice = None

return build_response(
title="file extension",
parent=self.data,
item="fig",
sub_item="file extension",
validation_type="value in list",
is_valid=is_valid,
expected=allowed_file_extensions,
expected=f'{", ".join(allowed_extensions)} (.svg only in <alternatives>)',
obtained=file_extension,
advice=advice,
data=self.data,
error_level=self.rules["file_extension_error_level"],
)

def validate_fig_type(self):
"""Rule 5: Validate @fig-type values (ERROR)"""
fig_type = self.data.get("type")
allowed_types = self.rules["allowed_fig_types"]
is_valid = fig_type in allowed_types

return build_response(
title="@fig-type",
parent=self.data,
item="fig",
sub_item="@fig-type",
validation_type="value in list",
is_valid=is_valid,
expected=f'one of {allowed_types}',
obtained=fig_type,
advice=f'Invalid @fig-type value "{fig_type}". Use one of: {", ".join(allowed_types)}.',
data=self.data,
error_level=self.rules["fig_type_error_level"],
)

def validate_xml_lang_in_fig_group(self):
"""Rule 6: Validate @xml:lang in <fig> inside <fig-group> (ERROR)"""
xml_lang = self.data.get("xml_lang")
is_valid = bool(xml_lang)

return build_response(
title="@xml:lang in fig-group",
parent=self.data,
item="fig",
sub_item="@xml:lang",
validation_type="exist",
is_valid=is_valid,
expected="@xml:lang attribute",
obtained=xml_lang,
advice='When <fig> is inside <fig-group>, the @xml:lang attribute is mandatory. Add xml:lang attribute to <fig>. Example: <fig xml:lang="en">.',
data=self.data,
error_level=self.rules["xml_lang_in_fig_group_error_level"],
)

def validate_accessibility(self):
"""Rule 7: Validate presence of alt-text or long-desc (WARNING)"""
alt_text = self.data.get("graphic_alt_text")
long_desc = self.data.get("graphic_long_desc")
has_accessibility = bool(alt_text or long_desc)

return build_response(
title="accessibility",
parent=self.data,
item="fig",
sub_item="alt-text or long-desc",
validation_type="exist",
is_valid=has_accessibility,
expected="<alt-text> or <long-desc>",
obtained="present" if has_accessibility else None,
advice='For accessibility, add <alt-text> or <long-desc> inside <graphic>. Example: <graphic xlink:href="image.jpg"><alt-text>Brief description</alt-text></graphic>.',
data=self.data,
error_level=self.rules["accessibility_error_level"],
)

def validate_alt_text_length(self):
"""Rule 8: Validate alt-text character limit (WARNING)"""
alt_text = self.data.get("graphic_alt_text")
max_length = self.rules["alt_text_max_length"]
current_length = len(alt_text)
is_valid = current_length <= max_length

return build_response(
title="alt-text length",
parent=self.data,
item="fig",
sub_item="alt-text length",
validation_type="format",
is_valid=is_valid,
expected=f"≤ {max_length} characters",
obtained=f"{current_length} characters",
advice=f'The <alt-text> content has {current_length} characters, exceeding the recommended maximum of {max_length}. Please shorten the description.',
data=self.data,
error_level=self.rules["alt_text_length_error_level"],
)

21 changes: 12 additions & 9 deletions packtools/sps/validation_rules/fig_rules.json
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
{
"fig_rules": {
"alternatives_error_level": "CRITICAL",
"error_level": "WARNING",
"required_error_level": "CRITICAL",
"absent_error_level": "WARNING",
"id_error_level": "CRITICAL",
"label_error_level": "CRITICAL",
"caption_error_level": "CRITICAL",
"content_error_level": "CRITICAL",
"file_extension_error_level": "CRITICAL",
"allowed file extensions": ["tif", "jpg", "png", "svg"]
"graphic_error_level": "CRITICAL",
"xlink_href_error_level": "CRITICAL",
"file_extension_error_level": "ERROR",
"fig_type_error_level": "ERROR",
"xml_lang_in_fig_group_error_level": "ERROR",
"accessibility_error_level": "WARNING",
"alt_text_length_error_level": "WARNING",
"allowed_file_extensions": ["jpg", "jpeg", "png", "tif", "tiff"],
"allowed_fig_types": ["graphic", "chart", "diagram", "drawing", "illustration", "map"],
"alt_text_max_length": 120,
"article_types_requires": ["research-article"]
}
}
}
Loading